View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.conf.Configurable;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.client.Delete;
30  import org.apache.hadoop.hbase.client.HConnectionManager;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Put;
33  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.mapreduce.JobContext;
36  import org.apache.hadoop.mapreduce.OutputCommitter;
37  import org.apache.hadoop.mapreduce.OutputFormat;
38  import org.apache.hadoop.mapreduce.RecordWriter;
39  import org.apache.hadoop.mapreduce.TaskAttemptContext;
40  
41  /**
42   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
43   * while the output value <u>must</u> be either a {@link Put} or a
44   * {@link Delete} instance.
45   *
46   * @param <KEY>  The type of the key. Ignored in this class.
47   */
48  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Writable>
49  implements Configurable {
50  
51    private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
52  
53    /** Job parameter that specifies the output table. */
54    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
55  
56    /**
57     * Optional job parameter to specify a peer cluster.
58     * Used specifying remote cluster when copying between hbase clusters (the
59     * source is picked up from <code>hbase-site.xml</code>).
60     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
61     */
62    public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
63  
64    /** Optional specification of the rs class name of the peer cluster */
65    public static final String
66        REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
67    /** Optional specification of the rs impl name of the peer cluster */
68    public static final String
69        REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
70  
71    /** The configuration. */
72    private Configuration conf = null;
73  
74    private HTable table;
75  
76    /**
77     * Writes the reducer output to an HBase table.
78     *
79     * @param <KEY>  The type of the key.
80     */
81    protected static class TableRecordWriter<KEY>
82    extends RecordWriter<KEY, Writable> {
83  
84      /** The table to write to. */
85      private HTable table;
86  
87      /**
88       * Instantiate a TableRecordWriter with the HBase HClient for writing.
89       *
90       * @param table  The table to write to.
91       */
92      public TableRecordWriter(HTable table) {
93        this.table = table;
94      }
95  
96      /**
97       * Closes the writer, in this case flush table commits.
98       *
99       * @param context  The context.
100      * @throws IOException When closing the writer fails.
101      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
102      */
103     @Override
104     public void close(TaskAttemptContext context)
105     throws IOException {
106       table.flushCommits();
107       // The following call will shutdown all connections to the cluster from
108       // this JVM.  It will close out our zk session otherwise zk wil log
109       // expired sessions rather than closed ones.  If any other HTable instance
110       // running in this JVM, this next call will cause it damage.  Presumption
111       // is that the above this.table is only instance.
112       HConnectionManager.deleteAllConnections(true);
113     }
114 
115     /**
116      * Writes a key/value pair into the table.
117      *
118      * @param key  The key.
119      * @param value  The value.
120      * @throws IOException When writing fails.
121      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
122      */
123     @Override
124     public void write(KEY key, Writable value)
125     throws IOException {
126       if (value instanceof Put) this.table.put(new Put((Put)value));
127       else if (value instanceof Delete) this.table.delete(new Delete((Delete)value));
128       else throw new IOException("Pass a Delete or a Put");
129     }
130   }
131 
132   /**
133    * Creates a new record writer.
134    *
135    * @param context  The current task context.
136    * @return The newly created writer instance.
137    * @throws IOException When creating the writer fails.
138    * @throws InterruptedException When the jobs is cancelled.
139    * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
140    */
141   @Override
142   public RecordWriter<KEY, Writable> getRecordWriter(
143     TaskAttemptContext context)
144   throws IOException, InterruptedException {
145     return new TableRecordWriter<KEY>(this.table);
146   }
147 
148   /**
149    * Checks if the output target exists.
150    *
151    * @param context  The current context.
152    * @throws IOException When the check fails.
153    * @throws InterruptedException When the job is aborted.
154    * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
155    */
156   @Override
157   public void checkOutputSpecs(JobContext context) throws IOException,
158       InterruptedException {
159     // TODO Check if the table exists?
160 
161   }
162 
163   /**
164    * Returns the output committer.
165    *
166    * @param context  The current context.
167    * @return The committer.
168    * @throws IOException When creating the committer fails.
169    * @throws InterruptedException When the job is aborted.
170    * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
171    */
172   @Override
173   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
174   throws IOException, InterruptedException {
175     return new TableOutputCommitter();
176   }
177 
178   public Configuration getConf() {
179     return conf;
180   }
181 
182   @Override
183   public void setConf(Configuration conf) {
184     String tableName = conf.get(OUTPUT_TABLE);
185     String address = conf.get(QUORUM_ADDRESS);
186     String serverClass = conf.get(REGION_SERVER_CLASS);
187     String serverImpl = conf.get(REGION_SERVER_IMPL);
188     try {
189       if (address != null) {
190         ZKUtil.applyClusterKeyToConf(conf, address);
191       }
192       if (serverClass != null) {
193         conf.set(HConstants.REGION_SERVER_CLASS, serverClass);
194         conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
195       }
196       this.table = new HTable(conf, tableName);
197       table.setAutoFlush(false);
198       LOG.info("Created table instance for "  + tableName);
199     } catch(IOException e) {
200       LOG.error(e);
201     }
202   }
203 }