1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.zookeeper;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.classification.InterfaceStability;
25 import org.apache.hadoop.hbase.HConstants;
26 import org.apache.hadoop.hbase.HRegionInfo;
27 import org.apache.hadoop.hbase.RegionTransition;
28 import org.apache.hadoop.hbase.ServerName;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30 import org.apache.hadoop.hbase.executor.EventType;
31 import org.apache.zookeeper.AsyncCallback;
32 import org.apache.zookeeper.KeeperException;
33 import org.apache.zookeeper.KeeperException.Code;
34 import org.apache.zookeeper.KeeperException.NoNodeException;
35 import org.apache.zookeeper.KeeperException.NodeExistsException;
36 import org.apache.zookeeper.data.Stat;
37
38 import java.util.List;
39
40 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be
41 // about zk and bytes only.
42
43 /**
44 * Utility class for doing region assignment in ZooKeeper. This class extends
45 * stuff done in {@link ZKUtil} to cover specific assignment operations.
46 * <p>
47 * Contains only static methods and constants.
48 * <p>
49 * Used by both the Master and RegionServer.
50 * <p>
51 * All valid transitions outlined below:
52 * <p>
53 * <b>MASTER</b>
54 * <ol>
55 * <li>
56 * Master creates an unassigned node as OFFLINE.
57 * - Cluster startup and table enabling.
58 * </li>
59 * <li>
60 * Master forces an existing unassigned node to OFFLINE.
61 * - RegionServer failure.
62 * - Allows transitions from all states to OFFLINE.
63 * </li>
64 * <li>
65 * Master deletes an unassigned node that was in a OPENED state.
66 * - Normal region transitions. Besides cluster startup, no other deletions
67 * of unassigned nodes is allowed.
68 * </li>
69 * <li>
70 * Master deletes all unassigned nodes regardless of state.
71 * - Cluster startup before any assignment happens.
72 * </li>
73 * </ol>
74 * <p>
75 * <b>REGIONSERVER</b>
76 * <ol>
77 * <li>
78 * RegionServer creates an unassigned node as CLOSING.
79 * - All region closes will do this in response to a CLOSE RPC from Master.
80 * - A node can never be transitioned to CLOSING, only created.
81 * </li>
82 * <li>
83 * RegionServer transitions an unassigned node from CLOSING to CLOSED.
84 * - Normal region closes. CAS operation.
85 * </li>
86 * <li>
87 * RegionServer transitions an unassigned node from OFFLINE to OPENING.
88 * - All region opens will do this in response to an OPEN RPC from the Master.
89 * - Normal region opens. CAS operation.
90 * </li>
91 * <li>
92 * RegionServer transitions an unassigned node from OPENING to OPENED.
93 * - Normal region opens. CAS operation.
94 * </li>
95 * </ol>
96 */
97 @InterfaceAudience.Public
98 @InterfaceStability.Evolving
99 public class ZKAssign {
100 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
101
102 /**
103 * Gets the full path node name for the unassigned node for the specified
104 * region.
105 * @param zkw zk reference
106 * @param regionName region name
107 * @return full path node name
108 */
109 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
110 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
111 }
112
113 /**
114 * Gets the region name from the full path node name of an unassigned node.
115 * @param path full zk path
116 * @return region name
117 */
118 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
119 return path.substring(zkw.assignmentZNode.length()+1);
120 }
121
122 // Master methods
123
124 /**
125 * Creates a new unassigned node in the OFFLINE state for the specified region.
126 *
127 * <p>Does not transition nodes from other states. If a node already exists
128 * for this region, a {@link NodeExistsException} will be thrown.
129 *
130 * <p>Sets a watcher on the unassigned region node if the method is successful.
131 *
132 * <p>This method should only be used during cluster startup and the enabling
133 * of a table.
134 *
135 * @param zkw zk reference
136 * @param region region to be created as offline
137 * @param serverName server transition will happen on
138 * @throws KeeperException if unexpected zookeeper exception
139 * @throws KeeperException.NodeExistsException if node already exists
140 */
141 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
142 ServerName serverName)
143 throws KeeperException, KeeperException.NodeExistsException {
144 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
145 }
146
147 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
148 ServerName serverName, final EventType event)
149 throws KeeperException, KeeperException.NodeExistsException {
150 LOG.debug(zkw.prefix("Creating unassigned node for " +
151 region.getEncodedName() + " in OFFLINE state"));
152 RegionTransition rt =
153 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
154 String node = getNodeName(zkw, region.getEncodedName());
155 ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
156 }
157
158 /**
159 * Creates an unassigned node in the OFFLINE state for the specified region.
160 * <p>
161 * Runs asynchronously. Depends on no pre-existing znode.
162 *
163 * <p>Sets a watcher on the unassigned region node.
164 *
165 * @param zkw zk reference
166 * @param region region to be created as offline
167 * @param serverName server transition will happen on
168 * @param cb
169 * @param ctx
170 * @throws KeeperException if unexpected zookeeper exception
171 * @throws KeeperException.NodeExistsException if node already exists
172 */
173 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
174 HRegionInfo region, ServerName serverName,
175 final AsyncCallback.StringCallback cb, final Object ctx)
176 throws KeeperException {
177 LOG.debug(zkw.prefix("Async create of unassigned node for " +
178 region.getEncodedName() + " with OFFLINE state"));
179 RegionTransition rt =
180 RegionTransition.createRegionTransition(
181 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
182 String node = getNodeName(zkw, region.getEncodedName());
183 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
184 }
185
186 /**
187 * Creates or force updates an unassigned node to the OFFLINE state for the
188 * specified region.
189 * <p>
190 * Attempts to create the node but if it exists will force it to transition to
191 * and OFFLINE state.
192 *
193 * <p>Sets a watcher on the unassigned region node if the method is
194 * successful.
195 *
196 * <p>This method should be used when assigning a region.
197 *
198 * @param zkw zk reference
199 * @param region region to be created as offline
200 * @param serverName server transition will happen on
201 * @return the version of the znode created in OFFLINE state, -1 if
202 * unsuccessful.
203 * @throws KeeperException if unexpected zookeeper exception
204 * @throws KeeperException.NodeExistsException if node already exists
205 */
206 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
207 HRegionInfo region, ServerName serverName) throws KeeperException {
208 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
209 region.getEncodedName() + " with OFFLINE state"));
210 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
211 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
212 byte [] data = rt.toByteArray();
213 String node = getNodeName(zkw, region.getEncodedName());
214 zkw.sync(node);
215 int version = ZKUtil.checkExists(zkw, node);
216 if (version == -1) {
217 return ZKUtil.createAndWatch(zkw, node, data);
218 } else {
219 boolean setData = false;
220 try {
221 setData = ZKUtil.setData(zkw, node, data, version);
222 // Setdata throws KeeperException which aborts the Master. So we are
223 // catching it here.
224 // If just before setting the znode to OFFLINE if the RS has made any
225 // change to the
226 // znode state then we need to return -1.
227 } catch (KeeperException kpe) {
228 LOG.info("Version mismatch while setting the node to OFFLINE state.");
229 return -1;
230 }
231 if (!setData) {
232 return -1;
233 } else {
234 // We successfully forced to OFFLINE, reset watch and handle if
235 // the state changed in between our set and the watch
236 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
237 rt = getRegionTransition(bytes);
238 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
239 // state changed, need to process
240 return -1;
241 }
242 }
243 }
244 return version + 1;
245 }
246
247 /**
248 * Deletes an existing unassigned node that is in the OPENED state for the
249 * specified region.
250 *
251 * <p>If a node does not already exist for this region, a
252 * {@link NoNodeException} will be thrown.
253 *
254 * <p>No watcher is set whether this succeeds or not.
255 *
256 * <p>Returns false if the node was not in the proper state but did exist.
257 *
258 * <p>This method is used during normal region transitions when a region
259 * finishes successfully opening. This is the Master acknowledging completion
260 * of the specified regions transition.
261 *
262 * @param zkw zk reference
263 * @param encodedRegionName opened region to be deleted from zk
264 * @throws KeeperException if unexpected zookeeper exception
265 * @throws KeeperException.NoNodeException if node does not exist
266 */
267 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
268 String encodedRegionName)
269 throws KeeperException, KeeperException.NoNodeException {
270 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
271 }
272
273 /**
274 * Deletes an existing unassigned node that is in the OFFLINE state for the
275 * specified region.
276 *
277 * <p>If a node does not already exist for this region, a
278 * {@link NoNodeException} will be thrown.
279 *
280 * <p>No watcher is set whether this succeeds or not.
281 *
282 * <p>Returns false if the node was not in the proper state but did exist.
283 *
284 * <p>This method is used during master failover when the regions on an RS
285 * that has died are all set to OFFLINE before being processed.
286 *
287 * @param zkw zk reference
288 * @param encodedRegionName closed region to be deleted from zk
289 * @throws KeeperException if unexpected zookeeper exception
290 * @throws KeeperException.NoNodeException if node does not exist
291 */
292 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
293 String encodedRegionName)
294 throws KeeperException, KeeperException.NoNodeException {
295 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
296 }
297
298 /**
299 * Deletes an existing unassigned node that is in the CLOSED state for the
300 * specified region.
301 *
302 * <p>If a node does not already exist for this region, a
303 * {@link NoNodeException} will be thrown.
304 *
305 * <p>No watcher is set whether this succeeds or not.
306 *
307 * <p>Returns false if the node was not in the proper state but did exist.
308 *
309 * <p>This method is used during table disables when a region finishes
310 * successfully closing. This is the Master acknowledging completion
311 * of the specified regions transition to being closed.
312 *
313 * @param zkw zk reference
314 * @param encodedRegionName closed region to be deleted from zk
315 * @throws KeeperException if unexpected zookeeper exception
316 * @throws KeeperException.NoNodeException if node does not exist
317 */
318 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
319 String encodedRegionName)
320 throws KeeperException, KeeperException.NoNodeException {
321 return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
322 }
323
324 /**
325 * Deletes an existing unassigned node that is in the CLOSING state for the
326 * specified region.
327 *
328 * <p>If a node does not already exist for this region, a
329 * {@link NoNodeException} will be thrown.
330 *
331 * <p>No watcher is set whether this succeeds or not.
332 *
333 * <p>Returns false if the node was not in the proper state but did exist.
334 *
335 * <p>This method is used during table disables when a region finishes
336 * successfully closing. This is the Master acknowledging completion
337 * of the specified regions transition to being closed.
338 *
339 * @param zkw zk reference
340 * @param region closing region to be deleted from zk
341 * @throws KeeperException if unexpected zookeeper exception
342 * @throws KeeperException.NoNodeException if node does not exist
343 */
344 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
345 HRegionInfo region)
346 throws KeeperException, KeeperException.NoNodeException {
347 String encodedRegionName = region.getEncodedName();
348 return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
349 }
350
351 /**
352 * Deletes an existing unassigned node that is in the specified state for the
353 * specified region.
354 *
355 * <p>If a node does not already exist for this region, a
356 * {@link NoNodeException} will be thrown.
357 *
358 * <p>No watcher is set whether this succeeds or not.
359 *
360 * <p>Returns false if the node was not in the proper state but did exist.
361 *
362 * <p>This method is used when a region finishes opening/closing.
363 * The Master acknowledges completion
364 * of the specified regions transition to being closed/opened.
365 *
366 * @param zkw zk reference
367 * @param encodedRegionName region to be deleted from zk
368 * @param expectedState state region must be in for delete to complete
369 * @throws KeeperException if unexpected zookeeper exception
370 * @throws KeeperException.NoNodeException if node does not exist
371 */
372 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
373 EventType expectedState)
374 throws KeeperException, KeeperException.NoNodeException {
375 return deleteNode(zkw, encodedRegionName, expectedState, -1);
376 }
377
378 /**
379 * Deletes an existing unassigned node that is in the specified state for the
380 * specified region.
381 *
382 * <p>If a node does not already exist for this region, a
383 * {@link NoNodeException} will be thrown.
384 *
385 * <p>No watcher is set whether this succeeds or not.
386 *
387 * <p>Returns false if the node was not in the proper state but did exist.
388 *
389 * <p>This method is used when a region finishes opening/closing.
390 * The Master acknowledges completion
391 * of the specified regions transition to being closed/opened.
392 *
393 * @param zkw zk reference
394 * @param encodedRegionName region to be deleted from zk
395 * @param expectedState state region must be in for delete to complete
396 * @param expectedVersion of the znode that is to be deleted.
397 * If expectedVersion need not be compared while deleting the znode
398 * pass -1
399 * @throws KeeperException if unexpected zookeeper exception
400 * @throws KeeperException.NoNodeException if node does not exist
401 */
402 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
403 EventType expectedState, int expectedVersion)
404 throws KeeperException, KeeperException.NoNodeException {
405 LOG.debug(zkw.prefix("Deleting existing unassigned " +
406 "node for " + encodedRegionName + " that is in expected state " + expectedState));
407 String node = getNodeName(zkw, encodedRegionName);
408 zkw.sync(node);
409 Stat stat = new Stat();
410 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
411 if (bytes == null) {
412 // If it came back null, node does not exist.
413 throw KeeperException.create(Code.NONODE);
414 }
415 RegionTransition rt = getRegionTransition(bytes);
416 EventType et = rt.getEventType();
417 if (!et.equals(expectedState)) {
418 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
419 expectedState + " state but node is in " + et + " state"));
420 return false;
421 }
422 if (expectedVersion != -1
423 && stat.getVersion() != expectedVersion) {
424 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
425 " the expected one. Got a version mismatch");
426 return false;
427 }
428 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
429 LOG.warn(zkw.prefix("Attempting to delete " +
430 "unassigned node " + encodedRegionName + " in " + expectedState +
431 " state but after verifying state, we got a version mismatch"));
432 return false;
433 }
434 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
435 encodedRegionName + " in expected state " + expectedState));
436 return true;
437 }
438
439 /**
440 * Deletes all unassigned nodes regardless of their state.
441 *
442 * <p>No watchers are set.
443 *
444 * <p>This method is used by the Master during cluster startup to clear out
445 * any existing state from other cluster runs.
446 *
447 * @param zkw zk reference
448 * @throws KeeperException if unexpected zookeeper exception
449 */
450 public static void deleteAllNodes(ZooKeeperWatcher zkw)
451 throws KeeperException {
452 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
453 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
454 }
455
456 /**
457 * Creates a new unassigned node in the CLOSING state for the specified
458 * region.
459 *
460 * <p>Does not transition nodes from any states. If a node already exists
461 * for this region, a {@link NodeExistsException} will be thrown.
462 *
463 * <p>If creation is successful, returns the version number of the CLOSING
464 * node created.
465 *
466 * <p>Set a watch.
467 *
468 * <p>This method should only be used by a Master when initiating a
469 * close of a region before sending a close request to the region server.
470 *
471 * @param zkw zk reference
472 * @param region region to be created as closing
473 * @param serverName server transition will happen on
474 * @return version of node after transition, -1 if unsuccessful transition
475 * @throws KeeperException if unexpected zookeeper exception
476 * @throws KeeperException.NodeExistsException if node already exists
477 */
478 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
479 ServerName serverName)
480 throws KeeperException, KeeperException.NodeExistsException {
481 LOG.debug(zkw.prefix("Creating unassigned node for " +
482 region.getEncodedName() + " in a CLOSING state"));
483 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
484 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
485 String node = getNodeName(zkw, region.getEncodedName());
486 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
487 }
488
489 // RegionServer methods
490
491 /**
492 * Transitions an existing unassigned node for the specified region which is
493 * currently in the CLOSING state to be in the CLOSED state.
494 *
495 * <p>Does not transition nodes from other states. If for some reason the
496 * node could not be transitioned, the method returns -1. If the transition
497 * is successful, the version of the node after transition is returned.
498 *
499 * <p>This method can fail and return false for three different reasons:
500 * <ul><li>Unassigned node for this region does not exist</li>
501 * <li>Unassigned node for this region is not in CLOSING state</li>
502 * <li>After verifying CLOSING state, update fails because of wrong version
503 * (someone else already transitioned the node)</li>
504 * </ul>
505 *
506 * <p>Does not set any watches.
507 *
508 * <p>This method should only be used by a RegionServer when initiating a
509 * close of a region after receiving a CLOSE RPC from the Master.
510 *
511 * @param zkw zk reference
512 * @param region region to be transitioned to closed
513 * @param serverName server transition happens on
514 * @return version of node after transition, -1 if unsuccessful transition
515 * @throws KeeperException if unexpected zookeeper exception
516 */
517 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
518 HRegionInfo region, ServerName serverName, int expectedVersion)
519 throws KeeperException {
520 return transitionNode(zkw, region, serverName,
521 EventType.M_ZK_REGION_CLOSING,
522 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
523 }
524
525 /**
526 * Transitions an existing unassigned node for the specified region which is
527 * currently in the OFFLINE state to be in the OPENING state.
528 *
529 * <p>Does not transition nodes from other states. If for some reason the
530 * node could not be transitioned, the method returns -1. If the transition
531 * is successful, the version of the node written as OPENING is returned.
532 *
533 * <p>This method can fail and return -1 for three different reasons:
534 * <ul><li>Unassigned node for this region does not exist</li>
535 * <li>Unassigned node for this region is not in OFFLINE state</li>
536 * <li>After verifying OFFLINE state, update fails because of wrong version
537 * (someone else already transitioned the node)</li>
538 * </ul>
539 *
540 * <p>Does not set any watches.
541 *
542 * <p>This method should only be used by a RegionServer when initiating an
543 * open of a region after receiving an OPEN RPC from the Master.
544 *
545 * @param zkw zk reference
546 * @param region region to be transitioned to opening
547 * @param serverName server transition happens on
548 * @return version of node after transition, -1 if unsuccessful transition
549 * @throws KeeperException if unexpected zookeeper exception
550 */
551 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
552 HRegionInfo region, ServerName serverName)
553 throws KeeperException {
554 return transitionNodeOpening(zkw, region, serverName,
555 EventType.M_ZK_REGION_OFFLINE);
556 }
557
558 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
559 HRegionInfo region, ServerName serverName, final EventType beginState)
560 throws KeeperException {
561 return transitionNode(zkw, region, serverName, beginState,
562 EventType.RS_ZK_REGION_OPENING, -1);
563 }
564
565 /**
566 * Retransitions an existing unassigned node for the specified region which is
567 * currently in the OPENING state to be in the OPENING state.
568 *
569 * <p>Does not transition nodes from other states. If for some reason the
570 * node could not be transitioned, the method returns -1. If the transition
571 * is successful, the version of the node rewritten as OPENING is returned.
572 *
573 * <p>This method can fail and return -1 for three different reasons:
574 * <ul><li>Unassigned node for this region does not exist</li>
575 * <li>Unassigned node for this region is not in OPENING state</li>
576 * <li>After verifying OPENING state, update fails because of wrong version
577 * (someone else already transitioned the node)</li>
578 * </ul>
579 *
580 * <p>Does not set any watches.
581 *
582 * <p>This method should only be used by a RegionServer when initiating an
583 * open of a region after receiving an OPEN RPC from the Master.
584 *
585 * @param zkw zk reference
586 * @param region region to be transitioned to opening
587 * @param serverName server transition happens on
588 * @param updateZNode write the znode. If false, we only check.
589 * @return version of node after transition, -1 if unsuccessful transition
590 * @throws KeeperException if unexpected zookeeper exception
591 */
592 public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
593 HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
594 throws KeeperException {
595
596 String encoded = region.getEncodedName();
597 if(LOG.isDebugEnabled()) {
598 LOG.debug(zkw.prefix("Attempting to retransition the opening state of node " +
599 HRegionInfo.prettyPrint(encoded)));
600 }
601
602 String node = getNodeName(zkw, encoded);
603 zkw.sync(node);
604
605 // Read existing data of the node
606 Stat stat = new Stat();
607 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
608 if (existingBytes == null) {
609 // Node no longer exists. Return -1. It means unsuccessful transition.
610 return -1;
611 }
612 RegionTransition rt = getRegionTransition(existingBytes);
613
614 // Verify it is the expected version
615 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
616 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
617 "unassigned node for " + encoded + " failed, " +
618 "the node existed but was version " + stat.getVersion() +
619 " not the expected version " + expectedVersion));
620 return -1;
621 }
622
623 // Verify it is in expected state
624 EventType et = rt.getEventType();
625 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
626 String existingServer = (rt.getServerName() == null)
627 ? "<unknown>" : rt.getServerName().toString();
628 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
629 + encoded + " failed, the node existed but was in the state " + et +
630 " set by the server " + existingServer));
631 return -1;
632 }
633
634 // We don't have to write the new state: the check is complete.
635 if (!updateZNode){
636 return expectedVersion;
637 }
638
639 // Write new data, ensuring data has not changed since we last read it
640 try {
641 rt = RegionTransition.createRegionTransition(
642 EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
643 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
644 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
645 "unassigned node for " + encoded + " failed, " +
646 "the node existed and was in the expected state but then when " +
647 "setting data we got a version mismatch"));
648 return -1;
649 }
650 if(LOG.isDebugEnabled()) {
651 LOG.debug(zkw.prefix("Successfully retransition the opening state of node " + encoded));
652 }
653 return stat.getVersion() + 1;
654 } catch (KeeperException.NoNodeException nne) {
655 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
656 "unassigned node for " + encoded + " failed, " +
657 "the node existed and was in the expected state but then when " +
658 "setting data it no longer existed"));
659 return -1;
660 }
661 }
662
663 /**
664 * Transitions an existing unassigned node for the specified region which is
665 * currently in the OPENING state to be in the OPENED state.
666 *
667 * <p>Does not transition nodes from other states. If for some reason the
668 * node could not be transitioned, the method returns -1. If the transition
669 * is successful, the version of the node after transition is returned.
670 *
671 * <p>This method can fail and return false for three different reasons:
672 * <ul><li>Unassigned node for this region does not exist</li>
673 * <li>Unassigned node for this region is not in OPENING state</li>
674 * <li>After verifying OPENING state, update fails because of wrong version
675 * (this should never actually happen since an RS only does this transition
676 * following a transition to OPENING. if two RS are conflicting, one would
677 * fail the original transition to OPENING and not this transition)</li>
678 * </ul>
679 *
680 * <p>Does not set any watches.
681 *
682 * <p>This method should only be used by a RegionServer when completing the
683 * open of a region.
684 *
685 * @param zkw zk reference
686 * @param region region to be transitioned to opened
687 * @param serverName server transition happens on
688 * @return version of node after transition, -1 if unsuccessful transition
689 * @throws KeeperException if unexpected zookeeper exception
690 */
691 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
692 HRegionInfo region, ServerName serverName, int expectedVersion)
693 throws KeeperException {
694 return transitionNode(zkw, region, serverName,
695 EventType.RS_ZK_REGION_OPENING,
696 EventType.RS_ZK_REGION_OPENED, expectedVersion);
697 }
698
699 /**
700 *
701 * @param zkw zk reference
702 * @param region region to be closed
703 * @param expectedVersion expected version of the znode
704 * @return true if the znode exists, has the right version and the right state. False otherwise.
705 * @throws KeeperException
706 */
707 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
708 int expectedVersion) throws KeeperException {
709
710 final String encoded = getNodeName(zkw, region.getEncodedName());
711 zkw.sync(encoded);
712
713 // Read existing data of the node
714 Stat stat = new Stat();
715 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
716
717 if (existingBytes == null) {
718 LOG.warn(zkw.prefix("Attempt to check the " +
719 "closing node for " + encoded +
720 ". The node does not exist"));
721 return false;
722 }
723
724 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
725 LOG.warn(zkw.prefix("Attempt to check the " +
726 "closing node for " + encoded +
727 ". The node existed but was version " + stat.getVersion() +
728 " not the expected version " + expectedVersion));
729 return false;
730 }
731
732 RegionTransition rt = getRegionTransition(existingBytes);
733
734 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
735 LOG.warn(zkw.prefix("Attempt to check the " +
736 "closing node for " + encoded +
737 ". The node existed but was in an unexpected state: " + rt.getEventType()));
738 return false;
739 }
740
741 return true;
742 }
743
744 /**
745 * Method that actually performs unassigned node transitions.
746 *
747 * <p>Attempts to transition the unassigned node for the specified region
748 * from the expected state to the state in the specified transition data.
749 *
750 * <p>Method first reads existing data and verifies it is in the expected
751 * state. If the node does not exist or the node is not in the expected
752 * state, the method returns -1. If the transition is successful, the
753 * version number of the node following the transition is returned.
754 *
755 * <p>If the read state is what is expected, it attempts to write the new
756 * state and data into the node. When doing this, it includes the expected
757 * version (determined when the existing state was verified) to ensure that
758 * only one transition is successful. If there is a version mismatch, the
759 * method returns -1.
760 *
761 * <p>If the write is successful, no watch is set and the method returns true.
762 *
763 * @param zkw zk reference
764 * @param region region to be transitioned to opened
765 * @param serverName server transition happens on
766 * @param endState state to transition node to if all checks pass
767 * @param beginState state the node must currently be in to do transition
768 * @param expectedVersion expected version of data before modification, or -1
769 * @return version of node after transition, -1 if unsuccessful transition
770 * @throws KeeperException if unexpected zookeeper exception
771 */
772 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
773 ServerName serverName, EventType beginState, EventType endState,
774 int expectedVersion)
775 throws KeeperException {
776 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
777 }
778
779
780 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
781 ServerName serverName, EventType beginState, EventType endState,
782 int expectedVersion, final byte [] payload)
783 throws KeeperException {
784 String encoded = region.getEncodedName();
785 if(LOG.isDebugEnabled()) {
786 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
787 " from " + beginState.toString() + " to " + endState.toString()));
788 }
789
790 String node = getNodeName(zkw, encoded);
791 zkw.sync(node);
792
793 // Read existing data of the node
794 Stat stat = new Stat();
795 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
796 if (existingBytes == null) {
797 // Node no longer exists. Return -1. It means unsuccessful transition.
798 return -1;
799 }
800
801 // Verify it is the expected version
802 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
803 LOG.warn(zkw.prefix("Attempt to transition the " +
804 "unassigned node for " + encoded +
805 " from " + beginState + " to " + endState + " failed, " +
806 "the node existed but was version " + stat.getVersion() +
807 " not the expected version " + expectedVersion));
808 return -1;
809 }
810
811 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
812 && endState.equals(EventType.RS_ZK_REGION_OPENING)
813 && expectedVersion == -1 && stat.getVersion() != 0) {
814 // the below check ensures that double assignment doesnot happen.
815 // When the node is created for the first time then the expected version
816 // that is passed will be -1 and the version in znode will be 0.
817 // In all other cases the version in znode will be > 0.
818 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
819 + encoded + " from " + beginState + " to " + endState + " failed, "
820 + "the node existed but was version " + stat.getVersion()
821 + " not the expected version " + expectedVersion));
822 return -1;
823 }
824
825 RegionTransition rt = getRegionTransition(existingBytes);
826
827 // Verify the server transition happens on is not changed
828 if (!rt.getServerName().equals(serverName)) {
829 LOG.warn(zkw.prefix("Attempt to transition the " +
830 "unassigned node for " + encoded +
831 " from " + beginState + " to " + endState + " failed, " +
832 "the server that tried to transition was " + serverName +
833 " not the expected " + rt.getServerName()));
834 return -1;
835 }
836
837 // Verify it is in expected state
838 EventType et = rt.getEventType();
839 if (!et.equals(beginState)) {
840 String existingServer = (rt.getServerName() == null)
841 ? "<unknown>" : rt.getServerName().toString();
842 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
843 + " from " + beginState + " to " + endState + " failed, the node existed but"
844 + " was in the state " + et + " set by the server " + existingServer));
845 return -1;
846 }
847
848 // Write new data, ensuring data has not changed since we last read it
849 try {
850 rt = RegionTransition.createRegionTransition(
851 endState, region.getRegionName(), serverName, payload);
852 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
853 LOG.warn(zkw.prefix("Attempt to transition the " +
854 "unassigned node for " + encoded +
855 " from " + beginState + " to " + endState + " failed, " +
856 "the node existed and was in the expected state but then when " +
857 "setting data we got a version mismatch"));
858 return -1;
859 }
860 if(LOG.isDebugEnabled()) {
861 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
862 " from " + beginState + " to " + endState));
863 }
864 return stat.getVersion() + 1;
865 } catch (KeeperException.NoNodeException nne) {
866 LOG.warn(zkw.prefix("Attempt to transition the " +
867 "unassigned node for " + encoded +
868 " from " + beginState + " to " + endState + " failed, " +
869 "the node existed and was in the expected state but then when " +
870 "setting data it no longer existed"));
871 return -1;
872 }
873 }
874
875 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
876 try {
877 return RegionTransition.parseFrom(bytes);
878 } catch (DeserializationException e) {
879 // Convert to a zk exception for now. Otherwise have to change API
880 throw ZKUtil.convert(e);
881 }
882 }
883
884 /**
885 * Gets the current data in the unassigned node for the specified region name
886 * or fully-qualified path.
887 *
888 * <p>Returns null if the region does not currently have a node.
889 *
890 * <p>Sets a watch on the node if the node exists.
891 *
892 * @param zkw zk reference
893 * @param pathOrRegionName fully-specified path or region name
894 * @return znode content
895 * @throws KeeperException if unexpected zookeeper exception
896 */
897 public static byte [] getData(ZooKeeperWatcher zkw,
898 String pathOrRegionName)
899 throws KeeperException {
900 String node = getPath(zkw, pathOrRegionName);
901 return ZKUtil.getDataAndWatch(zkw, node);
902 }
903
904 /**
905 * Gets the current data in the unassigned node for the specified region name
906 * or fully-qualified path.
907 *
908 * <p>Returns null if the region does not currently have a node.
909 *
910 * <p>Sets a watch on the node if the node exists.
911 *
912 * @param zkw zk reference
913 * @param pathOrRegionName fully-specified path or region name
914 * @param stat object to populate the version.
915 * @return znode content
916 * @throws KeeperException if unexpected zookeeper exception
917 */
918 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
919 String pathOrRegionName, Stat stat)
920 throws KeeperException {
921 String node = getPath(zkw, pathOrRegionName);
922 return ZKUtil.getDataAndWatch(zkw, node, stat);
923 }
924
925 /**
926 * Gets the current data in the unassigned node for the specified region name
927 * or fully-qualified path.
928 *
929 * <p>Returns null if the region does not currently have a node.
930 *
931 * <p>Does not set a watch.
932 *
933 * @param zkw zk reference
934 * @param pathOrRegionName fully-specified path or region name
935 * @param stat object to store node info into on getData call
936 * @return znode content
937 * @throws KeeperException if unexpected zookeeper exception
938 */
939 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
940 String pathOrRegionName, Stat stat)
941 throws KeeperException {
942 String node = getPath(zkw, pathOrRegionName);
943 return ZKUtil.getDataNoWatch(zkw, node, stat);
944 }
945
946 /**
947 * @param zkw
948 * @param pathOrRegionName
949 * @return Path to znode
950 */
951 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
952 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
953 }
954
955 /**
956 * Get the version of the specified znode
957 * @param zkw zk reference
958 * @param region region's info
959 * @return the version of the znode, -1 if it doesn't exist
960 * @throws KeeperException
961 */
962 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
963 throws KeeperException {
964 String znode = getNodeName(zkw, region.getEncodedName());
965 return ZKUtil.checkExists(zkw, znode);
966 }
967
968 /**
969 * Delete the assignment node regardless of its current state.
970 * <p>
971 * Fail silent even if the node does not exist at all.
972 * @param watcher
973 * @param regionInfo
974 * @throws KeeperException
975 */
976 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
977 HRegionInfo regionInfo)
978 throws KeeperException {
979 String node = getNodeName(watcher, regionInfo.getEncodedName());
980 ZKUtil.deleteNodeFailSilent(watcher, node);
981 }
982
983 /**
984 * Blocks until there are no node in regions in transition.
985 * <p>
986 * Used in testing only.
987 * @param zkw zk reference
988 * @throws KeeperException
989 * @throws InterruptedException
990 */
991 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
992 throws KeeperException, InterruptedException {
993 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
994 List<String> znodes =
995 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
996 if (znodes != null && !znodes.isEmpty()) {
997 LOG.debug("Waiting on RIT: " + znodes);
998 }
999 Thread.sleep(100);
1000 }
1001 }
1002
1003 /**
1004 * Blocks until there is at least one node in regions in transition.
1005 * <p>
1006 * Used in testing only.
1007 * @param zkw zk reference
1008 * @throws KeeperException
1009 * @throws InterruptedException
1010 */
1011 public static void blockUntilRIT(ZooKeeperWatcher zkw)
1012 throws KeeperException, InterruptedException {
1013 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1014 List<String> znodes =
1015 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1016 if (znodes == null || znodes.isEmpty()) {
1017 LOG.debug("No RIT in ZK");
1018 }
1019 Thread.sleep(100);
1020 }
1021 }
1022
1023 /**
1024 * Presume bytes are serialized unassigned data structure
1025 * @param znodeBytes
1026 * @return String of the deserialized znode bytes.
1027 */
1028 static String toString(final byte[] znodeBytes) {
1029 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the
1030 // method in here so RegionTransition does not leak into ZKUtil.
1031 try {
1032 RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1033 return rt.toString();
1034 } catch (DeserializationException e) {
1035 return "";
1036 }
1037 }
1038 }