1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import org.apache.hadoop.conf.Configuration;
23 import org.apache.hadoop.hbase.HBaseConfiguration;
24 import org.apache.hadoop.hbase.HConstants;
25 import org.apache.hadoop.hbase.util.Bytes;
26 import org.apache.hadoop.hbase.client.Scan;
27 import org.apache.hadoop.mapreduce.Job;
28 import org.apache.hadoop.util.GenericOptionsParser;
29
30 import java.io.IOException;
31 import java.util.HashMap;
32 import java.util.Map;
33
34
35
36
37
38
39 public class CopyTable {
40
41 final static String NAME = "copytable";
42 static long startTime = 0;
43 static long endTime = 0;
44 static int versions = -1;
45 static String tableName = null;
46 static String newTableName = null;
47 static String peerAddress = null;
48 static String families = null;
49 static boolean allCells = false;
50
51
52
53
54
55
56
57
58
59 public static Job createSubmittableJob(Configuration conf, String[] args)
60 throws IOException {
61 if (!doCommandLine(args)) {
62 return null;
63 }
64 Job job = new Job(conf, NAME + "_" + tableName);
65 job.setJarByClass(CopyTable.class);
66 Scan scan = new Scan();
67 scan.setCacheBlocks(false);
68 if (startTime != 0) {
69 scan.setTimeRange(startTime,
70 endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
71 }
72 if (allCells) {
73 scan.setRaw(true);
74 }
75 if (versions >= 0) {
76 scan.setMaxVersions(versions);
77 }
78 if(families != null) {
79 String[] fams = families.split(",");
80 Map<String,String> cfRenameMap = new HashMap<String,String>();
81 for(String fam : fams) {
82 String sourceCf;
83 if(fam.contains(":")) {
84
85 String[] srcAndDest = fam.split(":", 2);
86 sourceCf = srcAndDest[0];
87 String destCf = srcAndDest[1];
88 cfRenameMap.put(sourceCf, destCf);
89 } else {
90
91 sourceCf = fam;
92 }
93 scan.addFamily(Bytes.toBytes(sourceCf));
94 }
95 Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
96 }
97 TableMapReduceUtil.initTableMapperJob(tableName, scan,
98 Import.Importer.class, null, null, job);
99 TableMapReduceUtil.initTableReducerJob(
100 newTableName == null ? tableName : newTableName, null, job,
101 null, peerAddress, null, null);
102 job.setNumReduceTasks(0);
103 return job;
104 }
105
106
107
108
109 private static void printUsage(final String errorMsg) {
110 if (errorMsg != null && errorMsg.length() > 0) {
111 System.err.println("ERROR: " + errorMsg);
112 }
113 System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " +
114 "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
115 System.err.println();
116 System.err.println("Options:");
117 System.err.println(" rs.class hbase.regionserver.class of the peer cluster");
118 System.err.println(" specify if different from current cluster");
119 System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster");
120 System.err.println(" starttime beginning of the time range (unixtime in millis)");
121 System.err.println(" without endtime means from starttime to forever");
122 System.err.println(" endtime end of the time range. Ignored if no starttime specified.");
123 System.err.println(" versions number of cell versions to copy");
124 System.err.println(" new.name new table's name");
125 System.err.println(" peer.adr Address of the peer cluster given in the format");
126 System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
127 System.err.println(" families comma-separated list of families to copy");
128 System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
129 System.err.println(" To keep the same name, just give \"cfName\"");
130 System.err.println(" all.cells also copy delete markers and deleted cells");
131 System.err.println();
132 System.err.println("Args:");
133 System.err.println(" tablename Name of the table to copy");
134 System.err.println();
135 System.err.println("Examples:");
136 System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
137 System.err.println(" $ bin/hbase " +
138 "org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 " +
139 "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
140 System.err.println("For performance consider the following general options:\n"
141 + "-Dhbase.client.scanner.caching=100\n"
142 + "-Dmapred.map.tasks.speculative.execution=false");
143 }
144
145 private static boolean doCommandLine(final String[] args) {
146
147
148 if (args.length < 1) {
149 printUsage(null);
150 return false;
151 }
152 try {
153 for (int i = 0; i < args.length; i++) {
154 String cmd = args[i];
155 if (cmd.equals("-h") || cmd.startsWith("--h")) {
156 printUsage(null);
157 return false;
158 }
159
160 final String startTimeArgKey = "--starttime=";
161 if (cmd.startsWith(startTimeArgKey)) {
162 startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
163 continue;
164 }
165
166 final String endTimeArgKey = "--endtime=";
167 if (cmd.startsWith(endTimeArgKey)) {
168 endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
169 continue;
170 }
171
172 final String versionsArgKey = "--versions=";
173 if (cmd.startsWith(versionsArgKey)) {
174 versions = Integer.parseInt(cmd.substring(versionsArgKey.length()));
175 continue;
176 }
177
178 final String newNameArgKey = "--new.name=";
179 if (cmd.startsWith(newNameArgKey)) {
180 newTableName = cmd.substring(newNameArgKey.length());
181 continue;
182 }
183
184 final String peerAdrArgKey = "--peer.adr=";
185 if (cmd.startsWith(peerAdrArgKey)) {
186 peerAddress = cmd.substring(peerAdrArgKey.length());
187 continue;
188 }
189
190 final String familiesArgKey = "--families=";
191 if (cmd.startsWith(familiesArgKey)) {
192 families = cmd.substring(familiesArgKey.length());
193 continue;
194 }
195
196 if (cmd.startsWith("--all.cells")) {
197 allCells = true;
198 continue;
199 }
200
201 if (i == args.length-1) {
202 tableName = cmd;
203 } else {
204 printUsage("Invalid argument '" + cmd + "'" );
205 return false;
206 }
207 }
208 if (newTableName == null && peerAddress == null) {
209 printUsage("At least a new table name or a " +
210 "peer address must be specified");
211 return false;
212 }
213 if (startTime > endTime) {
214 printUsage("Invalid time range filter: starttime=" + startTime + " > endtime=" + endTime);
215 return false;
216 }
217 } catch (Exception e) {
218 e.printStackTrace();
219 printUsage("Can't start because " + e.getMessage());
220 return false;
221 }
222 return true;
223 }
224
225
226
227
228
229
230
231 public static void main(String[] args) throws Exception {
232 Configuration conf = HBaseConfiguration.create();
233 String[] otherArgs =
234 new GenericOptionsParser(conf, args).getRemainingArgs();
235 Job job = createSubmittableJob(conf, otherArgs);
236 if (job != null) {
237 System.exit(job.waitForCompletion(true) ? 0 : 1);
238 }
239 }
240 }