1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.classification.InterfaceStability;
25  import org.apache.hadoop.hbase.client.HTable;
26  import org.apache.hadoop.hbase.client.Result;
27  import org.apache.hadoop.hbase.client.Scan;
28  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
29  import org.apache.hadoop.mapreduce.InputSplit;
30  import org.apache.hadoop.mapreduce.RecordReader;
31  import org.apache.hadoop.mapreduce.TaskAttemptContext;
32  
33  /**
34   * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
35   * pairs.
36   */
37  @InterfaceAudience.Public
38  @InterfaceStability.Stable
39  public class TableRecordReader
40  extends RecordReader<ImmutableBytesWritable, Result> {
41  
42    private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl();
43  
44    /**
45     * Restart from survivable exceptions by creating a new scanner.
46     *
47     * @param firstRow  The first row to start at.
48     * @throws IOException When restarting fails.
49     */
50    public void restart(byte[] firstRow) throws IOException {
51      this.recordReaderImpl.restart(firstRow);
52    }
53  
54  
55    /**
56     * Sets the HBase table.
57     *
58     * @param htable  The {@link HTable} to scan.
59     */
60    public void setHTable(HTable htable) {
61      this.recordReaderImpl.setHTable(htable);
62    }
63  
64    /**
65     * Sets the scan defining the actual details like columns etc.
66     *
67     * @param scan  The scan to set.
68     */
69    public void setScan(Scan scan) {
70      this.recordReaderImpl.setScan(scan);
71    }
72  
73    /**
74     * Closes the split.
75     *
76     * @see org.apache.hadoop.mapreduce.RecordReader#close()
77     */
78    @Override
79    public void close() {
80      this.recordReaderImpl.close();
81    }
82  
83    /**
84     * Returns the current key.
85     *
86     * @return The current key.
87     * @throws IOException
88     * @throws InterruptedException When the job is aborted.
89     * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey()
90     */
91    @Override
92    public ImmutableBytesWritable getCurrentKey() throws IOException,
93        InterruptedException {
94      return this.recordReaderImpl.getCurrentKey();
95    }
96  
97    /**
98     * Returns the current value.
99     *
100    * @return The current value.
101    * @throws IOException When the value is faulty.
102    * @throws InterruptedException When the job is aborted.
103    * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue()
104    */
105   @Override
106   public Result getCurrentValue() throws IOException, InterruptedException {
107     return this.recordReaderImpl.getCurrentValue();
108   }
109 
110   /**
111    * Initializes the reader.
112    *
113    * @param inputsplit  The split to work with.
114    * @param context  The current task context.
115    * @throws IOException When setting up the reader fails.
116    * @throws InterruptedException When the job is aborted.
117    * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
118    *   org.apache.hadoop.mapreduce.InputSplit,
119    *   org.apache.hadoop.mapreduce.TaskAttemptContext)
120    */
121   @Override
122   public void initialize(InputSplit inputsplit,
123       TaskAttemptContext context) throws IOException,
124       InterruptedException {
125     this.recordReaderImpl.initialize(inputsplit, context);
126   }
127 
128   /**
129    * Positions the record reader to the next record.
130    *
131    * @return <code>true</code> if there was another record.
132    * @throws IOException When reading the record failed.
133    * @throws InterruptedException When the job was aborted.
134    * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue()
135    */
136   @Override
137   public boolean nextKeyValue() throws IOException, InterruptedException {
138     return this.recordReaderImpl.nextKeyValue();
139   }
140 
141   /**
142    * The current progress of the record reader through its data.
143    *
144    * @return A number between 0.0 and 1.0, the fraction of the data read.
145    * @see org.apache.hadoop.mapreduce.RecordReader#getProgress()
146    */
147   @Override
148   public float getProgress() {
149     return this.recordReaderImpl.getProgress();
150   }
151 }