1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.util.ArrayList;
23
24 import org.apache.hadoop.hbase.HConstants;
25 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser;
26 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
27 import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine;
28 import org.apache.hadoop.hbase.util.Bytes;
29 import org.junit.Test;
30
31 import com.google.common.base.Joiner;
32 import com.google.common.base.Splitter;
33 import com.google.common.collect.Iterables;
34
35 import static org.junit.Assert.*;
36
37 public class TestImportTsv {
38 @Test
39 public void testTsvParserSpecParsing() {
40 TsvParser parser;
41
42 parser = new TsvParser("HBASE_ROW_KEY", "\t");
43 assertNull(parser.getFamily(0));
44 assertNull(parser.getQualifier(0));
45 assertEquals(0, parser.getRowKeyColumnIndex());
46
47 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t");
48 assertNull(parser.getFamily(0));
49 assertNull(parser.getQualifier(0));
50 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
51 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
52 assertEquals(0, parser.getRowKeyColumnIndex());
53
54 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t");
55 assertNull(parser.getFamily(0));
56 assertNull(parser.getQualifier(0));
57 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1));
58 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
59 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2));
60 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2));
61 assertEquals(0, parser.getRowKeyColumnIndex());
62 }
63
64 @Test
65 public void testTsvParser() throws BadTsvLineException {
66 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t");
67 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0));
68 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0));
69 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1));
70 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1));
71 assertNull(parser.getFamily(2));
72 assertNull(parser.getQualifier(2));
73 assertEquals(2, parser.getRowKeyColumnIndex());
74
75 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d");
76 ParsedLine parsed = parser.parse(line, line.length);
77 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line)));
78 }
79
80 private void checkParsing(ParsedLine parsed, Iterable<String> expected) {
81 ArrayList<String> parsedCols = new ArrayList<String>();
82 for (int i = 0; i < parsed.getColumnCount(); i++) {
83 parsedCols.add(Bytes.toString(
84 parsed.getLineBytes(),
85 parsed.getColumnOffset(i),
86 parsed.getColumnLength(i)));
87 }
88 if (!Iterables.elementsEqual(parsedCols, expected)) {
89 fail("Expected: " + Joiner.on(",").join(expected) + "\n" +
90 "Got:" + Joiner.on(",").join(parsedCols));
91 }
92 }
93
94 private void assertBytesEquals(byte[] a, byte[] b) {
95 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b));
96 }
97
98
99
100
101 @Test(expected=BadTsvLineException.class)
102 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException {
103 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
104 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c");
105 ParsedLine parsed = parser.parse(line, line.length);
106 }
107
108 @Test(expected=BadTsvLineException.class)
109 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException {
110 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
111 byte[] line = Bytes.toBytes("");
112 ParsedLine parsed = parser.parse(line, line.length);
113 }
114
115 @Test(expected=BadTsvLineException.class)
116 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException {
117 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t");
118 byte[] line = Bytes.toBytes("key_only");
119 ParsedLine parsed = parser.parse(line, line.length);
120 }
121
122 @Test(expected=BadTsvLineException.class)
123 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException {
124 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t");
125 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key");
126 ParsedLine parsed = parser.parse(line, line.length);
127 }
128 }