001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapred.lib.db;
020    
021    import java.io.IOException;
022    import java.sql.Connection;
023    import java.sql.SQLException;
024    import java.util.List;
025    
026    import org.apache.hadoop.classification.InterfaceAudience;
027    import org.apache.hadoop.classification.InterfaceStability;
028    import org.apache.hadoop.conf.Configuration;
029    import org.apache.hadoop.io.LongWritable;
030    import org.apache.hadoop.io.Writable;
031    import org.apache.hadoop.mapred.InputFormat;
032    import org.apache.hadoop.mapred.InputSplit;
033    import org.apache.hadoop.mapred.JobConf;
034    import org.apache.hadoop.mapred.JobConfigurable;
035    import org.apache.hadoop.mapred.RecordReader;
036    import org.apache.hadoop.mapred.Reporter;
037    import org.apache.hadoop.mapreduce.Job;
038    
039    /**
040     * @deprecated 
041     * Use {@link org.apache.hadoop.mapreduce.lib.db.DBInputFormat} instead.
042     */
043    @Deprecated
044    @InterfaceAudience.Public
045    @InterfaceStability.Stable
046    public class DBInputFormat<T  extends DBWritable>
047        extends org.apache.hadoop.mapreduce.lib.db.DBInputFormat<T> 
048        implements InputFormat<LongWritable, T>, JobConfigurable {
049      /**
050       * A RecordReader that reads records from a SQL table.
051       * Emits LongWritables containing the record number as 
052       * key and DBWritables as value.  
053       */
054      protected class DBRecordReader extends
055          org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>
056          implements RecordReader<LongWritable, T> {
057        /**
058         * @param split The InputSplit to read data for
059         * @throws SQLException 
060         */
061        protected DBRecordReader(DBInputSplit split, Class<T> inputClass, 
062            JobConf job, Connection conn, DBConfiguration dbConfig, String cond,
063            String [] fields, String table) throws SQLException {
064          super(split, inputClass, job, conn, dbConfig, cond, fields, table);
065        }
066    
067        /** {@inheritDoc} */
068        public LongWritable createKey() {
069          return new LongWritable();  
070        }
071    
072        /** {@inheritDoc} */
073        public T createValue() {
074          return super.createValue();
075        }
076    
077        public long getPos() throws IOException {
078          return super.getPos();
079        }
080    
081        /** {@inheritDoc} */
082        public boolean next(LongWritable key, T value) throws IOException {
083          return super.next(key, value);
084        }
085      }
086    
087      /**
088       * A RecordReader implementation that just passes through to a wrapped
089       * RecordReader built with the new API.
090       */
091      private static class DBRecordReaderWrapper<T extends DBWritable>
092          implements RecordReader<LongWritable, T> {
093    
094        private org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> rr;
095        
096        public DBRecordReaderWrapper(
097            org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> inner) {
098          this.rr = inner;
099        }
100    
101        public void close() throws IOException {
102          rr.close();
103        }
104    
105        public LongWritable createKey() {
106          return new LongWritable();
107        }
108    
109        public T createValue() {
110          return rr.createValue();
111        }
112    
113        public float getProgress() throws IOException {
114          return rr.getProgress();
115        }
116        
117        public long getPos() throws IOException {
118          return rr.getPos();
119        }
120    
121        public boolean next(LongWritable key, T value) throws IOException {
122          return rr.next(key, value);
123        }
124      }
125    
126      /**
127       * A Class that does nothing, implementing DBWritable
128       */
129      public static class NullDBWritable extends 
130          org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable 
131          implements DBWritable, Writable {
132      }
133      /**
134       * A InputSplit that spans a set of rows
135       */
136      protected static class DBInputSplit extends 
137          org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit 
138          implements InputSplit {
139        /**
140         * Default Constructor
141         */
142        public DBInputSplit() {
143        }
144    
145        /**
146         * Convenience Constructor
147         * @param start the index of the first row to select
148         * @param end the index of the last row to select
149         */
150        public DBInputSplit(long start, long end) {
151          super(start, end);
152        }
153      }
154    
155      /** {@inheritDoc} */
156      public void configure(JobConf job) {
157        super.setConf(job);
158      }
159    
160      /** {@inheritDoc} */
161      @SuppressWarnings("unchecked")
162      public RecordReader<LongWritable, T> getRecordReader(InputSplit split,
163          JobConf job, Reporter reporter) throws IOException {
164    
165        // wrap the DBRR in a shim class to deal with API differences.
166        return new DBRecordReaderWrapper<T>(
167            (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) 
168            createDBRecordReader(
169              (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job));
170      }
171    
172      /** {@inheritDoc} */
173      public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
174        List<org.apache.hadoop.mapreduce.InputSplit> newSplits = 
175          super.getSplits(new Job(job));
176        InputSplit[] ret = new InputSplit[newSplits.size()];
177        int i = 0;
178        for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) {
179          org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = 
180            (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit)s;
181          ret[i++] = new DBInputSplit(split.getStart(), split.getEnd());
182        }
183        return ret;
184      }
185    
186      /**
187       * Initializes the map-part of the job with the appropriate input settings.
188       * 
189       * @param job The job
190       * @param inputClass the class object implementing DBWritable, which is the 
191       * Java object holding tuple fields.
192       * @param tableName The table to read data from
193       * @param conditions The condition which to select data with, eg. '(updated >
194       * 20070101 AND length > 0)'
195       * @param orderBy the fieldNames in the orderBy clause.
196       * @param fieldNames The field names in the table
197       * @see #setInput(JobConf, Class, String, String)
198       */
199      public static void setInput(JobConf job, Class<? extends DBWritable> inputClass,
200          String tableName,String conditions, String orderBy, String... fieldNames) {
201        job.setInputFormat(DBInputFormat.class);
202    
203        DBConfiguration dbConf = new DBConfiguration(job);
204        dbConf.setInputClass(inputClass);
205        dbConf.setInputTableName(tableName);
206        dbConf.setInputFieldNames(fieldNames);
207        dbConf.setInputConditions(conditions);
208        dbConf.setInputOrderBy(orderBy);
209      }
210      
211      /**
212       * Initializes the map-part of the job with the appropriate input settings.
213       * 
214       * @param job The job
215       * @param inputClass the class object implementing DBWritable, which is the 
216       * Java object holding tuple fields.
217       * @param inputQuery the input query to select fields. Example : 
218       * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
219       * @param inputCountQuery the input query that returns the number of records in
220       * the table. 
221       * Example : "SELECT COUNT(f1) FROM Mytable"
222       * @see #setInput(JobConf, Class, String, String, String, String...)
223       */
224      public static void setInput(JobConf job, Class<? extends DBWritable> inputClass,
225          String inputQuery, String inputCountQuery) {
226        job.setInputFormat(DBInputFormat.class);
227        
228        DBConfiguration dbConf = new DBConfiguration(job);
229        dbConf.setInputClass(inputClass);
230        dbConf.setInputQuery(inputQuery);
231        dbConf.setInputCountQuery(inputCountQuery);
232        
233      }
234    }