1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Comparator;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.NavigableSet;
28 import java.util.Random;
29 import java.util.TreeMap;
30 import java.util.TreeSet;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.fs.BlockLocation;
35 import org.apache.hadoop.fs.FileStatus;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.HServerAddress;
40 import org.apache.hadoop.hbase.HServerInfo;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public class LoadBalancer {
59 private static final Log LOG = LogFactory.getLog(LoadBalancer.class);
60 private static final Random rand = new Random();
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130 public List<RegionPlan> balanceCluster(
131 Map<HServerInfo,List<HRegionInfo>> clusterState) {
132 long startTime = System.currentTimeMillis();
133
134
135 TreeMap<HServerInfo,List<HRegionInfo>> serversByLoad =
136 new TreeMap<HServerInfo,List<HRegionInfo>>(
137 new HServerInfo.LoadComparator());
138 int numServers = clusterState.size();
139 if (numServers == 0) {
140 LOG.debug("numServers=0 so skipping load balancing");
141 return null;
142 }
143 int numRegions = 0;
144
145 for(Map.Entry<HServerInfo, List<HRegionInfo>> server:
146 clusterState.entrySet()) {
147 server.getKey().getLoad().setNumberOfRegions(server.getValue().size());
148 numRegions += server.getKey().getLoad().getNumberOfRegions();
149 serversByLoad.put(server.getKey(), server.getValue());
150 }
151
152
153 float average = (float)numRegions / numServers;
154 int min = numRegions / numServers;
155 int max = numRegions % numServers == 0 ? min : min + 1;
156 if(serversByLoad.lastKey().getLoad().getNumberOfRegions() <= max &&
157 serversByLoad.firstKey().getLoad().getNumberOfRegions() >= min) {
158
159 LOG.info("Skipping load balancing. servers=" + numServers + " " +
160 "regions=" + numRegions + " average=" + average + " " +
161 "mostloaded=" + serversByLoad.lastKey().getLoad().getNumberOfRegions() +
162 " leastloaded=" + serversByLoad.lastKey().getLoad().getNumberOfRegions());
163 return null;
164 }
165
166
167
168 List<RegionPlan> regionsToMove = new ArrayList<RegionPlan>();
169 int regionidx = 0;
170
171
172 int serversOverloaded = 0;
173 Map<HServerInfo,BalanceInfo> serverBalanceInfo =
174 new TreeMap<HServerInfo,BalanceInfo>();
175 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
176 serversByLoad.descendingMap().entrySet()) {
177 HServerInfo serverInfo = server.getKey();
178 int regionCount = serverInfo.getLoad().getNumberOfRegions();
179 if(regionCount <= max) {
180 serverBalanceInfo.put(serverInfo, new BalanceInfo(0, 0));
181 break;
182 }
183 serversOverloaded++;
184 List<HRegionInfo> regions = server.getValue();
185 int numToOffload = Math.min(regionCount - max, regions.size());
186 int numTaken = 0;
187 for (HRegionInfo hri: regions) {
188
189 if (hri.isMetaRegion()) continue;
190 regionsToMove.add(new RegionPlan(hri, serverInfo, null));
191 numTaken++;
192 if (numTaken >= numToOffload) break;
193 }
194 serverBalanceInfo.put(serverInfo,
195 new BalanceInfo(numToOffload, (-1)*numTaken));
196 }
197
198
199 int serversUnderloaded = 0;
200 int neededRegions = 0;
201 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
202 serversByLoad.entrySet()) {
203 int regionCount = server.getKey().getLoad().getNumberOfRegions();
204 if(regionCount >= min) {
205 break;
206 }
207 serversUnderloaded++;
208 int numToTake = min - regionCount;
209 int numTaken = 0;
210 while(numTaken < numToTake && regionidx < regionsToMove.size()) {
211 regionsToMove.get(regionidx).setDestination(server.getKey());
212 numTaken++;
213 regionidx++;
214 }
215 serverBalanceInfo.put(server.getKey(), new BalanceInfo(0, numTaken));
216
217 if(numTaken < numToTake) {
218 neededRegions += (numToTake - numTaken);
219 }
220 }
221
222
223
224 if(neededRegions == 0 && regionidx == regionsToMove.size()) {
225 long endTime = System.currentTimeMillis();
226 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
227 "Moving " + regionsToMove.size() + " regions off of " +
228 serversOverloaded + " overloaded servers onto " +
229 serversUnderloaded + " less loaded servers");
230 return regionsToMove;
231 }
232
233
234
235
236
237 if (neededRegions != 0) {
238
239 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
240 serversByLoad.descendingMap().entrySet()) {
241 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey());
242 int idx =
243 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
244 if (idx >= server.getValue().size()) break;
245 HRegionInfo region = server.getValue().get(idx);
246 if (region.isMetaRegion()) continue;
247 regionsToMove.add(new RegionPlan(region, server.getKey(), null));
248 if(--neededRegions == 0) {
249
250 break;
251 }
252 }
253 }
254
255
256
257
258
259 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
260 serversByLoad.entrySet()) {
261 int regionCount = server.getKey().getLoad().getNumberOfRegions();
262 if (regionCount >= min) break;
263 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey());
264 if(balanceInfo != null) {
265 regionCount += balanceInfo.getNumRegionsAdded();
266 }
267 if(regionCount >= min) {
268 continue;
269 }
270 int numToTake = min - regionCount;
271 int numTaken = 0;
272 while(numTaken < numToTake && regionidx < regionsToMove.size()) {
273 regionsToMove.get(regionidx).setDestination(server.getKey());
274 numTaken++;
275 regionidx++;
276 }
277 }
278
279
280 if(regionidx != regionsToMove.size()) {
281 for(Map.Entry<HServerInfo, List<HRegionInfo>> server :
282 serversByLoad.entrySet()) {
283 int regionCount = server.getKey().getLoad().getNumberOfRegions();
284 if(regionCount >= max) {
285 break;
286 }
287 regionsToMove.get(regionidx).setDestination(server.getKey());
288 regionidx++;
289 if(regionidx == regionsToMove.size()) {
290 break;
291 }
292 }
293 }
294
295 long endTime = System.currentTimeMillis();
296
297 if (regionidx != regionsToMove.size() || neededRegions != 0) {
298
299 LOG.warn("regionidx=" + regionidx + ", regionsToMove=" + regionsToMove.size() +
300 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
301 ", serversUnderloaded=" + serversUnderloaded);
302 StringBuilder sb = new StringBuilder();
303 for (Map.Entry<HServerInfo, List<HRegionInfo>> e: clusterState.entrySet()) {
304 if (sb.length() > 0) sb.append(", ");
305 sb.append(e.getKey().getServerName());
306 sb.append(" ");
307 sb.append(e.getValue().size());
308 }
309 LOG.warn("Input " + sb.toString());
310 }
311
312
313 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
314 "Moving " + regionsToMove.size() + " regions off of " +
315 serversOverloaded + " overloaded servers onto " +
316 serversUnderloaded + " less loaded servers");
317
318 return regionsToMove;
319 }
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 private static class BalanceInfo {
335
336 private final int nextRegionForUnload;
337 private final int numRegionsAdded;
338
339 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
340 this.nextRegionForUnload = nextRegionForUnload;
341 this.numRegionsAdded = numRegionsAdded;
342 }
343
344 public int getNextRegionForUnload() {
345 return nextRegionForUnload;
346 }
347
348 public int getNumRegionsAdded() {
349 return numRegionsAdded;
350 }
351 }
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 public static Map<HServerInfo,List<HRegionInfo>> roundRobinAssignment(
371 List<HRegionInfo> regions, List<HServerInfo> servers) {
372 if(regions.size() == 0 || servers.size() == 0) {
373 return null;
374 }
375 Map<HServerInfo,List<HRegionInfo>> assignments =
376 new TreeMap<HServerInfo,List<HRegionInfo>>();
377 int numRegions = regions.size();
378 int numServers = servers.size();
379 int max = (int)Math.ceil((float)numRegions/numServers);
380 int serverIdx = 0;
381 if (numServers > 1) {
382 serverIdx = rand.nextInt(numServers);
383 }
384 int regionIdx = 0;
385 for (int j = 0; j < numServers; j++) {
386 HServerInfo server = servers.get((j+serverIdx) % numServers);
387 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
388 for (int i=regionIdx; i<numRegions; i += numServers) {
389 serverRegions.add(regions.get(i % numRegions));
390 }
391 assignments.put(server, serverRegions);
392 regionIdx++;
393 }
394 return assignments;
395 }
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413 public static Map<HServerInfo, List<HRegionInfo>> retainAssignment(
414 Map<HRegionInfo, HServerAddress> regions, List<HServerInfo> servers) {
415 Map<HServerInfo, List<HRegionInfo>> assignments =
416 new TreeMap<HServerInfo, List<HRegionInfo>>();
417
418 Map<HServerAddress, HServerInfo> serverMap =
419 new TreeMap<HServerAddress, HServerInfo>();
420 for (HServerInfo server : servers) {
421 serverMap.put(server.getServerAddress(), server);
422 assignments.put(server, new ArrayList<HRegionInfo>());
423 }
424 for (Map.Entry<HRegionInfo, HServerAddress> region : regions.entrySet()) {
425 HServerAddress hsa = region.getValue();
426 HServerInfo server = hsa == null? null: serverMap.get(hsa);
427 if (server != null) {
428 assignments.get(server).add(region.getKey());
429 } else {
430 assignments.get(servers.get(rand.nextInt(assignments.size()))).add(
431 region.getKey());
432 }
433 }
434 return assignments;
435 }
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455 @SuppressWarnings("unused")
456 private List<String> getTopBlockLocations(FileSystem fs, HRegionInfo region)
457 throws IOException {
458 String encodedName = region.getEncodedName();
459 Path path = new Path("/hbase/table/" + encodedName);
460 FileStatus status = fs.getFileStatus(path);
461 BlockLocation [] blockLocations =
462 fs.getFileBlockLocations(status, 0, status.getLen());
463 Map<HostAndWeight,HostAndWeight> hostWeights =
464 new TreeMap<HostAndWeight,HostAndWeight>(new HostAndWeight.HostComparator());
465 for(BlockLocation bl : blockLocations) {
466 String [] hosts = bl.getHosts();
467 long len = bl.getLength();
468 for(String host : hosts) {
469 HostAndWeight haw = hostWeights.get(host);
470 if(haw == null) {
471 haw = new HostAndWeight(host, len);
472 hostWeights.put(haw, haw);
473 } else {
474 haw.addWeight(len);
475 }
476 }
477 }
478 NavigableSet<HostAndWeight> orderedHosts = new TreeSet<HostAndWeight>(
479 new HostAndWeight.WeightComparator());
480 orderedHosts.addAll(hostWeights.values());
481 List<String> topHosts = new ArrayList<String>(orderedHosts.size());
482 for(HostAndWeight haw : orderedHosts.descendingSet()) {
483 topHosts.add(haw.getHost());
484 }
485 return topHosts;
486 }
487
488
489
490
491
492
493
494
495
496
497
498 private static class HostAndWeight {
499
500 private final String host;
501 private long weight;
502
503 public HostAndWeight(String host, long weight) {
504 this.host = host;
505 this.weight = weight;
506 }
507
508 public void addWeight(long weight) {
509 this.weight += weight;
510 }
511
512 public String getHost() {
513 return host;
514 }
515
516 public long getWeight() {
517 return weight;
518 }
519
520 private static class HostComparator implements Comparator<HostAndWeight> {
521 @Override
522 public int compare(HostAndWeight l, HostAndWeight r) {
523 return l.getHost().compareTo(r.getHost());
524 }
525 }
526
527 private static class WeightComparator implements Comparator<HostAndWeight> {
528 @Override
529 public int compare(HostAndWeight l, HostAndWeight r) {
530 if(l.getWeight() == r.getWeight()) {
531 return l.getHost().compareTo(r.getHost());
532 }
533 return l.getWeight() < r.getWeight() ? -1 : 1;
534 }
535 }
536 }
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556 public static Map<HRegionInfo,HServerInfo> immediateAssignment(
557 List<HRegionInfo> regions, List<HServerInfo> servers) {
558 Map<HRegionInfo,HServerInfo> assignments =
559 new TreeMap<HRegionInfo,HServerInfo>();
560 for(HRegionInfo region : regions) {
561 assignments.put(region, servers.get(rand.nextInt(servers.size())));
562 }
563 return assignments;
564 }
565
566 public static HServerInfo randomAssignment(List<HServerInfo> servers) {
567 if (servers == null || servers.isEmpty()) {
568 LOG.warn("Wanted to do random assignment but no servers to assign to");
569 return null;
570 }
571 return servers.get(rand.nextInt(servers.size()));
572 }
573
574
575
576
577
578
579
580
581
582
583
584 public static class RegionPlan implements Comparable<RegionPlan> {
585 private final HRegionInfo hri;
586 private final HServerInfo source;
587 private HServerInfo dest;
588
589
590
591
592
593
594
595
596
597
598
599
600 public RegionPlan(final HRegionInfo hri, HServerInfo source, HServerInfo dest) {
601 this.hri = hri;
602 this.source = source;
603 this.dest = dest;
604 }
605
606
607
608
609 public void setDestination(HServerInfo dest) {
610 this.dest = dest;
611 }
612
613
614
615
616
617 public HServerInfo getSource() {
618 return source;
619 }
620
621
622
623
624
625 public HServerInfo getDestination() {
626 return dest;
627 }
628
629
630
631
632
633 public String getRegionName() {
634 return this.hri.getEncodedName();
635 }
636
637 public HRegionInfo getRegionInfo() {
638 return this.hri;
639 }
640
641
642
643
644
645 @Override
646 public int compareTo(RegionPlan o) {
647 return getRegionName().compareTo(o.getRegionName());
648 }
649
650 @Override
651 public String toString() {
652 return "hri=" + this.hri.getRegionNameAsString() + ", src=" +
653 (this.source == null? "": this.source.getServerName()) +
654 ", dest=" + (this.dest == null? "": this.dest.getServerName());
655 }
656 }
657 }