1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.HBaseTestingUtility;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.MediumTests;
34  import org.apache.hadoop.hbase.MiniHBaseCluster;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.Get;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.ResultScanner;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.filter.Filter;
43  import org.apache.hadoop.hbase.filter.PrefixFilter;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.mapreduce.Job;
46  import org.apache.hadoop.util.GenericOptionsParser;
47  import org.junit.After;
48  import org.junit.AfterClass;
49  import org.junit.Assert;
50  import org.junit.Before;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  @Category(MediumTests.class)
56  public class TestImportExport {
57    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
58    private static final byte[] ROW1 = Bytes.toBytes("row1");
59    private static final byte[] ROW2 = Bytes.toBytes("row2");
60    private static final String FAMILYA_STRING = "a";
61    private static final String FAMILYB_STRING = "b";
62    private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
63    private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
64    private static final byte[] QUAL = Bytes.toBytes("q");
65    private static final String OUTPUT_DIR = "outputdir";
66  
67    private static MiniHBaseCluster cluster;
68    private static long now = System.currentTimeMillis();
69  
70    @BeforeClass
71    public static void beforeClass() throws Exception {
72      cluster = UTIL.startMiniCluster();
73      UTIL.startMiniMapReduceCluster();
74    }
75  
76    @AfterClass
77    public static void afterClass() throws Exception {
78      UTIL.shutdownMiniMapReduceCluster();
79      UTIL.shutdownMiniCluster();
80    }
81  
82    @Before
83    @After
84    public void cleanup() throws Exception {
85      FileSystem fs = FileSystem.get(UTIL.getConfiguration());
86      fs.delete(new Path(OUTPUT_DIR), true);
87    }
88  
89    /**
90     * Test simple replication case with column mapping
91     * @throws Exception
92     */
93    @Test
94    public void testSimpleCase() throws Exception {
95      String EXPORT_TABLE = "exportSimpleCase";
96      HTable t = UTIL.createTable(Bytes.toBytes(EXPORT_TABLE), FAMILYA);
97      Put p = new Put(ROW1);
98      p.add(FAMILYA, QUAL, now, QUAL);
99      p.add(FAMILYA, QUAL, now+1, QUAL);
100     p.add(FAMILYA, QUAL, now+2, QUAL);
101     t.put(p);
102     p = new Put(ROW2);
103     p.add(FAMILYA, QUAL, now, QUAL);
104     p.add(FAMILYA, QUAL, now+1, QUAL);
105     p.add(FAMILYA, QUAL, now+2, QUAL);
106     t.put(p);
107 
108     String[] args = new String[] {
109         EXPORT_TABLE,
110         OUTPUT_DIR,
111         "1000"
112     };
113 
114     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
115     Configuration conf = opts.getConfiguration();
116     args = opts.getRemainingArgs();
117 
118     Job job = Export.createSubmittableJob(conf, args);
119     job.getConfiguration().set("mapreduce.framework.name", "yarn");
120     job.waitForCompletion(false);
121     assertTrue(job.isSuccessful());
122 
123 
124     String IMPORT_TABLE = "importTableSimpleCase";
125     t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), FAMILYB);
126     args = new String[] {
127         "-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
128         IMPORT_TABLE,
129         OUTPUT_DIR
130     };
131 
132     opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
133     conf = opts.getConfiguration();
134     args = opts.getRemainingArgs();
135 
136     job = Import.createSubmittableJob(conf, args);
137     job.getConfiguration().set("mapreduce.framework.name", "yarn");
138     job.waitForCompletion(false);
139     assertTrue(job.isSuccessful());
140 
141     Get g = new Get(ROW1);
142     g.setMaxVersions();
143     Result r = t.get(g);
144     assertEquals(3, r.size());
145     g = new Get(ROW2);
146     g.setMaxVersions();
147     r = t.get(g);
148     assertEquals(3, r.size());
149   }
150 
151   /**
152    * Test export .META. table
153    * 
154    * @throws Exception
155    */
156   @Test
157   public void testMetaExport() throws Exception {
158     String EXPORT_TABLE = ".META.";
159     String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1", "0", "0" };
160     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
161         cluster.getConfiguration()), args);
162     Configuration conf = opts.getConfiguration();
163     args = opts.getRemainingArgs();
164 
165     Job job = Export.createSubmittableJob(conf, args);
166     job.getConfiguration().set("mapreduce.framework.name", "yarn");
167     job.waitForCompletion(false);
168     assertTrue(job.isSuccessful());
169   }
170 
171   @Test
172   public void testWithDeletes() throws Exception {
173     String EXPORT_TABLE = "exportWithDeletes";
174     HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
175     desc.addFamily(new HColumnDescriptor(FAMILYA)
176         .setMaxVersions(5)
177         .setKeepDeletedCells(true)
178     );
179     UTIL.getHBaseAdmin().createTable(desc);
180     HTable t = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
181 
182     Put p = new Put(ROW1);
183     p.add(FAMILYA, QUAL, now, QUAL);
184     p.add(FAMILYA, QUAL, now+1, QUAL);
185     p.add(FAMILYA, QUAL, now+2, QUAL);
186     p.add(FAMILYA, QUAL, now+3, QUAL);
187     p.add(FAMILYA, QUAL, now+4, QUAL);
188     t.put(p);
189 
190     Delete d = new Delete(ROW1, now+3, null);
191     t.delete(d);
192     d = new Delete(ROW1);
193     d.deleteColumns(FAMILYA, QUAL, now+2);
194     t.delete(d);
195     
196     String[] args = new String[] {
197         "-D" + Export.RAW_SCAN + "=true",
198         EXPORT_TABLE,
199         OUTPUT_DIR,
200         "1000"
201     };
202 
203     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
204     Configuration conf = opts.getConfiguration();
205     args = opts.getRemainingArgs();
206 
207     Job job = Export.createSubmittableJob(conf, args);
208     job.getConfiguration().set("mapreduce.framework.name", "yarn");
209     job.waitForCompletion(false);
210     assertTrue(job.isSuccessful());
211 
212 
213     String IMPORT_TABLE = "importWithDeletes";
214     desc = new HTableDescriptor(IMPORT_TABLE);
215     desc.addFamily(new HColumnDescriptor(FAMILYA)
216         .setMaxVersions(5)
217         .setKeepDeletedCells(true)
218     );
219     UTIL.getHBaseAdmin().createTable(desc);
220     t.close();
221     t = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
222     args = new String[] {
223         IMPORT_TABLE,
224         OUTPUT_DIR
225     };
226 
227     opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
228     conf = opts.getConfiguration();
229     args = opts.getRemainingArgs();
230 
231     job = Import.createSubmittableJob(conf, args);
232     job.getConfiguration().set("mapreduce.framework.name", "yarn");
233     job.waitForCompletion(false);
234     assertTrue(job.isSuccessful());
235 
236     Scan s = new Scan();
237     s.setMaxVersions();
238     s.setRaw(true);
239     ResultScanner scanner = t.getScanner(s);
240     Result r = scanner.next();
241     KeyValue[] res = r.raw();
242     assertTrue(res[0].isDeleteFamily());
243     assertEquals(now+4, res[1].getTimestamp());
244     assertEquals(now+3, res[2].getTimestamp());
245     assertTrue(res[3].isDelete());
246     assertEquals(now+2, res[4].getTimestamp());
247     assertEquals(now+1, res[5].getTimestamp());
248     assertEquals(now, res[6].getTimestamp());
249     t.close();
250   }
251 
252   @Test
253   public void testWithFilter() throws Exception {
254     String EXPORT_TABLE = "exportSimpleCase_ImportWithFilter";
255     HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
256     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
257     UTIL.getHBaseAdmin().createTable(desc);
258     HTable exportTable = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
259 
260     Put p = new Put(ROW1);
261     p.add(FAMILYA, QUAL, now, QUAL);
262     p.add(FAMILYA, QUAL, now + 1, QUAL);
263     p.add(FAMILYA, QUAL, now + 2, QUAL);
264     p.add(FAMILYA, QUAL, now + 3, QUAL);
265     p.add(FAMILYA, QUAL, now + 4, QUAL);
266     exportTable.put(p);
267 
268     String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1000" };
269 
270     GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
271         cluster.getConfiguration()), args);
272     Configuration conf = opts.getConfiguration();
273     args = opts.getRemainingArgs();
274 
275     Job job = Export.createSubmittableJob(conf, args);
276     job.getConfiguration().set("mapreduce.framework.name", "yarn");
277     job.waitForCompletion(false);
278     assertTrue(job.isSuccessful());
279 
280     String IMPORT_TABLE = "importWithFilter";
281     desc = new HTableDescriptor(IMPORT_TABLE);
282     desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
283     UTIL.getHBaseAdmin().createTable(desc);
284 
285     HTable importTable = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
286     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
287         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE, OUTPUT_DIR,
288         "1000" };
289 
290     opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
291     conf = opts.getConfiguration();
292     args = opts.getRemainingArgs();
293 
294     job = Import.createSubmittableJob(conf, args);
295     job.getConfiguration().set("mapreduce.framework.name", "yarn");
296     job.waitForCompletion(false);
297     assertTrue(job.isSuccessful());
298 
299     // get the count of the source table for that time range
300     PrefixFilter filter = new PrefixFilter(ROW1);
301     int count = getCount(exportTable, filter);
302 
303     Assert.assertEquals("Unexpected row count between export and import tables", count,
304       getCount(importTable, null));
305 
306     // and then test that a broken command doesn't bork everything - easier here because we don't
307     // need to re-run the export job
308 
309     args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
310         "-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", EXPORT_TABLE,
311         OUTPUT_DIR, "1000" };
312 
313     opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
314     conf = opts.getConfiguration();
315     args = opts.getRemainingArgs();
316 
317     job = Import.createSubmittableJob(conf, args);
318     job.getConfiguration().set("mapreduce.framework.name", "yarn");
319     job.waitForCompletion(false);
320     assertFalse("Job succeeedd, but it had a non-instantiable filter!", job.isSuccessful());
321 
322     // cleanup
323     exportTable.close();
324     importTable.close();
325   }
326 
327   /**
328    * Count the number of keyvalues in the specified table for the given timerange
329    * @param start
330    * @param end
331    * @param table
332    * @return
333    * @throws IOException
334    */
335   private int getCount(HTable table, Filter filter) throws IOException {
336     Scan scan = new Scan();
337     scan.setFilter(filter);
338     ResultScanner results = table.getScanner(scan);
339     int count = 0;
340     for (Result res : results) {
341       count += res.size();
342     }
343     results.close();
344     return count;
345   }
346 }