1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import static org.apache.hadoop.hbase.util.FSUtils.recoverFileLease;
23
24 import java.io.EOFException;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.lang.reflect.Constructor;
28 import java.lang.reflect.InvocationTargetException;
29 import java.text.ParseException;
30 import java.util.ArrayList;
31 import java.util.Collections;
32 import java.util.LinkedList;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import java.util.TreeMap;
37 import java.util.TreeSet;
38 import java.util.concurrent.atomic.AtomicReference;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.conf.Configuration;
43 import org.apache.hadoop.fs.FileStatus;
44 import org.apache.hadoop.fs.FileSystem;
45 import org.apache.hadoop.fs.Path;
46 import org.apache.hadoop.hbase.io.HeapSize;
47 import org.apache.hadoop.hbase.HConstants;
48 import org.apache.hadoop.hbase.HTableDescriptor;
49 import org.apache.hadoop.hbase.RemoteExceptionHandler;
50 import org.apache.hadoop.hbase.regionserver.HRegion;
51 import org.apache.hadoop.hbase.regionserver.wal.HLog.Entry;
52 import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
53 import org.apache.hadoop.hbase.regionserver.wal.HLog.Writer;
54 import org.apache.hadoop.hbase.util.Bytes;
55 import org.apache.hadoop.hbase.util.ClassSize;
56 import org.apache.hadoop.io.MultipleIOException;
57
58 import com.google.common.base.Preconditions;
59 import com.google.common.collect.Lists;
60
61
62
63
64
65
66 public class HLogSplitter {
67
68 private static final String LOG_SPLITTER_IMPL = "hbase.hlog.splitter.impl";
69
70
71
72
73
74 public static final String RECOVERED_EDITS = "recovered.edits";
75
76
77 static final Log LOG = LogFactory.getLog(HLogSplitter.class);
78
79 private boolean hasSplit = false;
80 private long splitTime = 0;
81 private long splitSize = 0;
82
83
84
85 protected final Path rootDir;
86 protected final Path srcDir;
87 protected final Path oldLogDir;
88 protected final FileSystem fs;
89 protected final Configuration conf;
90
91
92
93 OutputSink outputSink;
94 EntryBuffers entryBuffers;
95
96
97
98 protected AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
99
100
101
102 Object dataAvailable = new Object();
103
104
105
106
107
108
109
110
111
112
113
114
115
116 public static HLogSplitter createLogSplitter(Configuration conf,
117 final Path rootDir, final Path srcDir,
118 Path oldLogDir, final FileSystem fs) {
119
120 @SuppressWarnings("unchecked")
121 Class<? extends HLogSplitter> splitterClass = (Class<? extends HLogSplitter>) conf
122 .getClass(LOG_SPLITTER_IMPL, HLogSplitter.class);
123 try {
124 Constructor<? extends HLogSplitter> constructor =
125 splitterClass.getConstructor(
126 Configuration.class,
127 Path.class,
128 Path.class,
129 Path.class,
130 FileSystem.class);
131 return constructor.newInstance(conf, rootDir, srcDir, oldLogDir, fs);
132 } catch (IllegalArgumentException e) {
133 throw new RuntimeException(e);
134 } catch (InstantiationException e) {
135 throw new RuntimeException(e);
136 } catch (IllegalAccessException e) {
137 throw new RuntimeException(e);
138 } catch (InvocationTargetException e) {
139 throw new RuntimeException(e);
140 } catch (SecurityException e) {
141 throw new RuntimeException(e);
142 } catch (NoSuchMethodException e) {
143 throw new RuntimeException(e);
144 }
145 }
146
147 public HLogSplitter(Configuration conf, Path rootDir, Path srcDir,
148 Path oldLogDir, FileSystem fs) {
149 this.conf = conf;
150 this.rootDir = rootDir;
151 this.srcDir = srcDir;
152 this.oldLogDir = oldLogDir;
153 this.fs = fs;
154
155 entryBuffers = new EntryBuffers(
156 conf.getInt("hbase.regionserver.hlog.splitlog.buffersize",
157 128*1024*1024));
158 outputSink = new OutputSink();
159 }
160
161
162
163
164
165
166
167
168
169 public List<Path> splitLog()
170 throws IOException {
171 Preconditions.checkState(!hasSplit,
172 "An HLogSplitter instance may only be used once");
173 hasSplit = true;
174
175 long startTime = System.currentTimeMillis();
176 List<Path> splits = null;
177 if (!fs.exists(srcDir)) {
178
179 return splits;
180 }
181 FileStatus[] logfiles = fs.listStatus(srcDir);
182 if (logfiles == null || logfiles.length == 0) {
183
184 return splits;
185 }
186 LOG.info("Splitting " + logfiles.length + " hlog(s) in "
187 + srcDir.toString());
188 splits = splitLog(logfiles);
189
190 splitTime = System.currentTimeMillis() - startTime;
191 LOG.info("hlog file splitting completed in " + splitTime +
192 " ms for " + srcDir.toString());
193 return splits;
194 }
195
196
197
198
199 public long getTime() {
200 return this.splitTime;
201 }
202
203
204
205
206 public long getSize() {
207 return this.splitSize;
208 }
209
210
211
212
213
214 Map<byte[], Long> getOutputCounts() {
215 Preconditions.checkState(hasSplit);
216 return outputSink.getOutputCounts();
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241 private List<Path> splitLog(final FileStatus[] logfiles) throws IOException {
242 List<Path> processedLogs = new ArrayList<Path>();
243 List<Path> corruptedLogs = new ArrayList<Path>();
244 List<Path> splits = null;
245
246 boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors", false);
247
248 splitSize = 0;
249
250 outputSink.startWriterThreads(entryBuffers);
251
252 try {
253 int i = 0;
254 for (FileStatus log : logfiles) {
255 Path logPath = log.getPath();
256 long logLength = log.getLen();
257 splitSize += logLength;
258 LOG.debug("Splitting hlog " + (i++ + 1) + " of " + logfiles.length
259 + ": " + logPath + ", length=" + logLength);
260 try {
261 recoverFileLease(fs, logPath, conf);
262 parseHLog(log, entryBuffers, fs, conf);
263 processedLogs.add(logPath);
264 } catch (EOFException eof) {
265
266 LOG.info("EOF from hlog " + logPath + ". Continuing");
267 processedLogs.add(logPath);
268 } catch (FileNotFoundException fnfe) {
269
270
271 LOG.info("A log was missing " + logPath +
272 ", probably because it was moved by the" +
273 " now dead region server. Continuing");
274 processedLogs.add(logPath);
275 } catch (IOException e) {
276
277
278 if (e.getCause() instanceof ParseException) {
279 LOG.warn("ParseException from hlog " + logPath + ". continuing");
280 processedLogs.add(logPath);
281 } else {
282 if (skipErrors) {
283 LOG.info("Got while parsing hlog " + logPath +
284 ". Marking as corrupted", e);
285 corruptedLogs.add(logPath);
286 } else {
287 throw e;
288 }
289 }
290 }
291 }
292 if (fs.listStatus(srcDir).length > processedLogs.size()
293 + corruptedLogs.size()) {
294 throw new OrphanHLogAfterSplitException(
295 "Discovered orphan hlog after split. Maybe the "
296 + "HRegionServer was not dead when we started");
297 }
298 archiveLogs(srcDir, corruptedLogs, processedLogs, oldLogDir, fs, conf);
299 } finally {
300 splits = outputSink.finishWritingAndClose();
301 }
302 return splits;
303 }
304
305
306
307
308
309
310
311
312
313
314
315
316
317 private static void archiveLogs(
318 final Path srcDir,
319 final List<Path> corruptedLogs,
320 final List<Path> processedLogs, final Path oldLogDir,
321 final FileSystem fs, final Configuration conf) throws IOException {
322 final Path corruptDir = new Path(conf.get(HConstants.HBASE_DIR), conf.get(
323 "hbase.regionserver.hlog.splitlog.corrupt.dir", ".corrupt"));
324
325 if (!fs.mkdirs(corruptDir)) {
326 LOG.info("Unable to mkdir " + corruptDir);
327 }
328 fs.mkdirs(oldLogDir);
329
330 for (Path corrupted : corruptedLogs) {
331 Path p = new Path(corruptDir, corrupted.getName());
332 if (!fs.rename(corrupted, p)) {
333 LOG.info("Unable to move corrupted log " + corrupted + " to " + p);
334 } else {
335 LOG.info("Moving corrupted log " + corrupted + " to " + p);
336 }
337 }
338
339 for (Path p : processedLogs) {
340 Path newPath = HLog.getHLogArchivePath(oldLogDir, p);
341 if (!fs.rename(p, newPath)) {
342 LOG.info("Unable to move " + p + " to " + newPath);
343 } else {
344 LOG.info("Archived processed log " + p + " to " + newPath);
345 }
346 }
347
348 if (!fs.delete(srcDir, true)) {
349 throw new IOException("Unable to delete src dir: " + srcDir);
350 }
351 }
352
353
354
355
356
357
358
359
360
361
362
363
364
365 static Path getRegionSplitEditsPath(final FileSystem fs,
366 final Entry logEntry, final Path rootDir) throws IOException {
367 Path tableDir = HTableDescriptor.getTableDir(rootDir, logEntry.getKey()
368 .getTablename());
369 Path regiondir = HRegion.getRegionDir(tableDir,
370 Bytes.toString(logEntry.getKey().getEncodedRegionName()));
371 if (!fs.exists(regiondir)) {
372 LOG.info("This region's directory doesn't exist: "
373 + regiondir.toString() + ". It is very likely that it was" +
374 " already split so it's safe to discard those edits.");
375 return null;
376 }
377 Path dir = HLog.getRegionDirRecoveredEditsDir(regiondir);
378 if (!fs.exists(dir)) {
379 if (!fs.mkdirs(dir)) LOG.warn("mkdir failed on " + dir);
380 }
381 return new Path(dir, formatRecoveredEditsFileName(logEntry.getKey()
382 .getLogSeqNum()));
383 }
384
385 static String formatRecoveredEditsFileName(final long seqid) {
386 return String.format("%019d", seqid);
387 }
388
389
390
391
392
393
394
395
396
397
398
399 private void parseHLog(final FileStatus logfile,
400 EntryBuffers entryBuffers, final FileSystem fs,
401 final Configuration conf)
402 throws IOException {
403
404
405
406 long length = logfile.getLen();
407 if (length <= 0) {
408 LOG.warn("File " + logfile.getPath() + " might be still open, length is 0");
409 }
410 Path path = logfile.getPath();
411 Reader in;
412 int editsCount = 0;
413 try {
414 in = getReader(fs, path, conf);
415 } catch (EOFException e) {
416 if (length <= 0) {
417
418
419
420
421 LOG.warn("Could not open " + path + " for reading. File is empty" + e);
422 return;
423 } else {
424 throw e;
425 }
426 }
427 try {
428 Entry entry;
429 while ((entry = in.next()) != null) {
430 entryBuffers.appendEntry(entry);
431 editsCount++;
432 }
433 } catch (InterruptedException ie) {
434 throw new RuntimeException(ie);
435 } finally {
436 LOG.debug("Pushed=" + editsCount + " entries from " + path);
437 try {
438 if (in != null) {
439 in.close();
440 }
441 } catch (IOException e) {
442 LOG.warn("Close log reader in finally threw exception -- continuing",
443 e);
444 }
445 }
446 }
447
448 private void writerThreadError(Throwable t) {
449 thrown.compareAndSet(null, t);
450 }
451
452
453
454
455 private void checkForErrors() throws IOException {
456 Throwable thrown = this.thrown.get();
457 if (thrown == null) return;
458 if (thrown instanceof IOException) {
459 throw (IOException)thrown;
460 } else {
461 throw new RuntimeException(thrown);
462 }
463 }
464
465
466
467 protected Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
468 throws IOException {
469 return HLog.createWriter(fs, logfile, conf);
470 }
471
472
473
474
475 protected Reader getReader(FileSystem fs, Path curLogFile, Configuration conf)
476 throws IOException {
477 return HLog.getReader(fs, curLogFile, conf);
478 }
479
480
481
482
483
484
485
486
487
488 class EntryBuffers {
489 Map<byte[], RegionEntryBuffer> buffers =
490 new TreeMap<byte[], RegionEntryBuffer>(Bytes.BYTES_COMPARATOR);
491
492
493
494
495 Set<byte[]> currentlyWriting = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
496
497 long totalBuffered = 0;
498 long maxHeapUsage;
499
500 EntryBuffers(long maxHeapUsage) {
501 this.maxHeapUsage = maxHeapUsage;
502 }
503
504
505
506
507
508
509
510
511 void appendEntry(Entry entry) throws InterruptedException, IOException {
512 HLogKey key = entry.getKey();
513
514 RegionEntryBuffer buffer;
515 synchronized (this) {
516 buffer = buffers.get(key.getEncodedRegionName());
517 if (buffer == null) {
518 buffer = new RegionEntryBuffer(key.getTablename(), key.getEncodedRegionName());
519 buffers.put(key.getEncodedRegionName(), buffer);
520 }
521 long incrHeap = buffer.appendEntry(entry);
522 totalBuffered += incrHeap;
523 }
524
525
526 synchronized (dataAvailable) {
527 while (totalBuffered > maxHeapUsage && thrown == null) {
528 LOG.debug("Used " + totalBuffered + " bytes of buffered edits, waiting for IO threads...");
529 dataAvailable.wait(3000);
530 }
531 dataAvailable.notifyAll();
532 }
533 checkForErrors();
534 }
535
536 synchronized RegionEntryBuffer getChunkToWrite() {
537 long biggestSize=0;
538 byte[] biggestBufferKey=null;
539
540 for (Map.Entry<byte[], RegionEntryBuffer> entry : buffers.entrySet()) {
541 long size = entry.getValue().heapSize();
542 if (size > biggestSize && !currentlyWriting.contains(entry.getKey())) {
543 biggestSize = size;
544 biggestBufferKey = entry.getKey();
545 }
546 }
547 if (biggestBufferKey == null) {
548 return null;
549 }
550
551 RegionEntryBuffer buffer = buffers.remove(biggestBufferKey);
552 currentlyWriting.add(biggestBufferKey);
553 return buffer;
554 }
555
556 void doneWriting(RegionEntryBuffer buffer) {
557 synchronized (this) {
558 boolean removed = currentlyWriting.remove(buffer.encodedRegionName);
559 assert removed;
560 }
561 long size = buffer.heapSize();
562
563 synchronized (dataAvailable) {
564 totalBuffered -= size;
565
566 dataAvailable.notifyAll();
567 }
568 }
569
570 synchronized boolean isRegionCurrentlyWriting(byte[] region) {
571 return currentlyWriting.contains(region);
572 }
573 }
574
575
576
577
578
579
580
581 static class RegionEntryBuffer implements HeapSize {
582 long heapInBuffer = 0;
583 List<Entry> entryBuffer;
584 byte[] tableName;
585 byte[] encodedRegionName;
586
587 RegionEntryBuffer(byte[] table, byte[] region) {
588 this.tableName = table;
589 this.encodedRegionName = region;
590 this.entryBuffer = new LinkedList<Entry>();
591 }
592
593 long appendEntry(Entry entry) {
594 internify(entry);
595 entryBuffer.add(entry);
596 long incrHeap = entry.getEdit().heapSize() +
597 ClassSize.align(2 * ClassSize.REFERENCE) +
598 0;
599 heapInBuffer += incrHeap;
600 return incrHeap;
601 }
602
603 private void internify(Entry entry) {
604 HLogKey k = entry.getKey();
605 k.internTableName(this.tableName);
606 k.internEncodedRegionName(this.encodedRegionName);
607 }
608
609 public long heapSize() {
610 return heapInBuffer;
611 }
612 }
613
614
615 class WriterThread extends Thread {
616 private volatile boolean shouldStop = false;
617
618 WriterThread(int i) {
619 super("WriterThread-" + i);
620 }
621
622 public void run() {
623 try {
624 doRun();
625 } catch (Throwable t) {
626 LOG.error("Error in log splitting write thread", t);
627 writerThreadError(t);
628 }
629 }
630
631 private void doRun() throws IOException {
632 LOG.debug("Writer thread " + this + ": starting");
633 while (true) {
634 RegionEntryBuffer buffer = entryBuffers.getChunkToWrite();
635 if (buffer == null) {
636
637 synchronized (dataAvailable) {
638 if (shouldStop) return;
639 try {
640 dataAvailable.wait(1000);
641 } catch (InterruptedException ie) {
642 if (!shouldStop) {
643 throw new RuntimeException(ie);
644 }
645 }
646 }
647 continue;
648 }
649
650 assert buffer != null;
651 try {
652 writeBuffer(buffer);
653 } finally {
654 entryBuffers.doneWriting(buffer);
655 }
656 }
657 }
658
659 private void writeBuffer(RegionEntryBuffer buffer) throws IOException {
660 List<Entry> entries = buffer.entryBuffer;
661 if (entries.isEmpty()) {
662 LOG.warn(this.getName() + " got an empty buffer, skipping");
663 return;
664 }
665
666 WriterAndPath wap = null;
667
668 long startTime = System.nanoTime();
669 try {
670 int editsCount = 0;
671
672 for (Entry logEntry : entries) {
673 if (wap == null) {
674 wap = outputSink.getWriterAndPath(logEntry);
675 if (wap == null) {
676
677
678 return;
679 }
680 }
681 wap.w.append(logEntry);
682 editsCount++;
683 }
684
685 wap.incrementEdits(editsCount);
686 wap.incrementNanoTime(System.nanoTime() - startTime);
687 } catch (IOException e) {
688 e = RemoteExceptionHandler.checkIOException(e);
689 LOG.fatal(this.getName() + " Got while writing log entry to log", e);
690 throw e;
691 }
692 }
693
694 void finish() {
695 shouldStop = true;
696 }
697 }
698
699
700
701
702 class OutputSink {
703 private final Map<byte[], WriterAndPath> logWriters = Collections.synchronizedMap(
704 new TreeMap<byte[], WriterAndPath>(Bytes.BYTES_COMPARATOR));
705 private final List<WriterThread> writerThreads = Lists.newArrayList();
706
707
708 private final Set<byte[]> blacklistedRegions = Collections.synchronizedSet(
709 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR));
710
711 private boolean hasClosed = false;
712
713
714
715
716
717
718 synchronized void startWriterThreads(EntryBuffers entryBuffers) {
719
720
721
722
723
724 int numThreads = conf.getInt(
725 "hbase.regionserver.hlog.splitlog.writer.threads", 3);
726
727 for (int i = 0; i < numThreads; i++) {
728 WriterThread t = new WriterThread(i);
729 t.start();
730 writerThreads.add(t);
731 }
732 }
733
734 List<Path> finishWritingAndClose() throws IOException {
735 LOG.info("Waiting for split writer threads to finish");
736 for (WriterThread t : writerThreads) {
737 t.finish();
738 }
739 for (WriterThread t: writerThreads) {
740 try {
741 t.join();
742 } catch (InterruptedException ie) {
743 throw new IOException(ie);
744 }
745 checkForErrors();
746 }
747 LOG.info("Split writers finished");
748
749 return closeStreams();
750 }
751
752
753
754
755
756 private List<Path> closeStreams() throws IOException {
757 Preconditions.checkState(!hasClosed);
758
759 List<Path> paths = new ArrayList<Path>();
760 List<IOException> thrown = Lists.newArrayList();
761
762 for (WriterAndPath wap : logWriters.values()) {
763 try {
764 wap.w.close();
765 } catch (IOException ioe) {
766 LOG.error("Couldn't close log at " + wap.p, ioe);
767 thrown.add(ioe);
768 continue;
769 }
770 paths.add(wap.p);
771 LOG.info("Closed path " + wap.p +" (wrote " + wap.editsWritten + " edits in "
772 + (wap.nanosSpent / 1000/ 1000) + "ms)");
773 }
774 if (!thrown.isEmpty()) {
775 throw MultipleIOException.createIOException(thrown);
776 }
777
778 hasClosed = true;
779 return paths;
780 }
781
782
783
784
785
786
787
788
789
790 WriterAndPath getWriterAndPath(Entry entry) throws IOException {
791
792 byte region[] = entry.getKey().getEncodedRegionName();
793 WriterAndPath ret = logWriters.get(region);
794 if (ret != null) {
795 return ret;
796 }
797
798
799
800 if (blacklistedRegions.contains(region)) {
801 return null;
802 }
803
804
805 Path regionedits = getRegionSplitEditsPath(fs,
806 entry, rootDir);
807 if (regionedits == null) {
808
809 blacklistedRegions.add(region);
810 return null;
811 }
812 deletePreexistingOldEdits(regionedits);
813 Writer w = createWriter(fs, regionedits, conf);
814 ret = new WriterAndPath(regionedits, w);
815 logWriters.put(region, ret);
816 LOG.debug("Creating writer path=" + regionedits + " region="
817 + Bytes.toStringBinary(region));
818
819 return ret;
820 }
821
822
823
824
825 private void deletePreexistingOldEdits(Path regionedits) throws IOException {
826 if (fs.exists(regionedits)) {
827 LOG.warn("Found existing old edits file. It could be the "
828 + "result of a previous failed split attempt. Deleting "
829 + regionedits + ", length="
830 + fs.getFileStatus(regionedits).getLen());
831 if (!fs.delete(regionedits, false)) {
832 LOG.warn("Failed delete of old " + regionedits);
833 }
834 }
835 }
836
837
838
839
840
841 private Map<byte[], Long> getOutputCounts() {
842 TreeMap<byte[], Long> ret = new TreeMap<byte[], Long>(
843 Bytes.BYTES_COMPARATOR);
844 synchronized (logWriters) {
845 for (Map.Entry<byte[], WriterAndPath> entry : logWriters.entrySet()) {
846 ret.put(entry.getKey(), entry.getValue().editsWritten);
847 }
848 }
849 return ret;
850 }
851 }
852
853
854
855
856
857
858 private final static class WriterAndPath {
859 final Path p;
860 final Writer w;
861
862
863 long editsWritten = 0;
864
865 long nanosSpent = 0;
866
867 WriterAndPath(final Path p, final Writer w) {
868 this.p = p;
869 this.w = w;
870 }
871
872 void incrementEdits(int edits) {
873 editsWritten += edits;
874 }
875
876 void incrementNanoTime(long nanos) {
877 nanosSpent += nanos;
878 }
879 }
880 }