001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapred.lib.db; 020 021 import java.io.IOException; 022 import java.sql.Connection; 023 import java.sql.SQLException; 024 import java.util.List; 025 026 import org.apache.hadoop.classification.InterfaceAudience; 027 import org.apache.hadoop.classification.InterfaceStability; 028 import org.apache.hadoop.conf.Configuration; 029 import org.apache.hadoop.io.LongWritable; 030 import org.apache.hadoop.io.Writable; 031 import org.apache.hadoop.mapred.InputFormat; 032 import org.apache.hadoop.mapred.InputSplit; 033 import org.apache.hadoop.mapred.JobConf; 034 import org.apache.hadoop.mapred.JobConfigurable; 035 import org.apache.hadoop.mapred.RecordReader; 036 import org.apache.hadoop.mapred.Reporter; 037 import org.apache.hadoop.mapreduce.Job; 038 039 /** 040 * @deprecated 041 * Use {@link org.apache.hadoop.mapreduce.lib.db.DBInputFormat} instead. 042 */ 043 @Deprecated 044 @InterfaceAudience.Public 045 @InterfaceStability.Stable 046 public class DBInputFormat<T extends DBWritable> 047 extends org.apache.hadoop.mapreduce.lib.db.DBInputFormat<T> 048 implements InputFormat<LongWritable, T>, JobConfigurable { 049 /** 050 * A RecordReader that reads records from a SQL table. 051 * Emits LongWritables containing the record number as 052 * key and DBWritables as value. 053 */ 054 protected class DBRecordReader extends 055 org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> 056 implements RecordReader<LongWritable, T> { 057 /** 058 * @param split The InputSplit to read data for 059 * @throws SQLException 060 */ 061 protected DBRecordReader(DBInputSplit split, Class<T> inputClass, 062 JobConf job, Connection conn, DBConfiguration dbConfig, String cond, 063 String [] fields, String table) throws SQLException { 064 super(split, inputClass, job, conn, dbConfig, cond, fields, table); 065 } 066 067 /** {@inheritDoc} */ 068 public LongWritable createKey() { 069 return new LongWritable(); 070 } 071 072 /** {@inheritDoc} */ 073 public T createValue() { 074 return super.createValue(); 075 } 076 077 public long getPos() throws IOException { 078 return super.getPos(); 079 } 080 081 /** {@inheritDoc} */ 082 public boolean next(LongWritable key, T value) throws IOException { 083 return super.next(key, value); 084 } 085 } 086 087 /** 088 * A RecordReader implementation that just passes through to a wrapped 089 * RecordReader built with the new API. 090 */ 091 private static class DBRecordReaderWrapper<T extends DBWritable> 092 implements RecordReader<LongWritable, T> { 093 094 private org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> rr; 095 096 public DBRecordReaderWrapper( 097 org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> inner) { 098 this.rr = inner; 099 } 100 101 public void close() throws IOException { 102 rr.close(); 103 } 104 105 public LongWritable createKey() { 106 return new LongWritable(); 107 } 108 109 public T createValue() { 110 return rr.createValue(); 111 } 112 113 public float getProgress() throws IOException { 114 return rr.getProgress(); 115 } 116 117 public long getPos() throws IOException { 118 return rr.getPos(); 119 } 120 121 public boolean next(LongWritable key, T value) throws IOException { 122 return rr.next(key, value); 123 } 124 } 125 126 /** 127 * A Class that does nothing, implementing DBWritable 128 */ 129 public static class NullDBWritable extends 130 org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable 131 implements DBWritable, Writable { 132 } 133 /** 134 * A InputSplit that spans a set of rows 135 */ 136 protected static class DBInputSplit extends 137 org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit 138 implements InputSplit { 139 /** 140 * Default Constructor 141 */ 142 public DBInputSplit() { 143 } 144 145 /** 146 * Convenience Constructor 147 * @param start the index of the first row to select 148 * @param end the index of the last row to select 149 */ 150 public DBInputSplit(long start, long end) { 151 super(start, end); 152 } 153 } 154 155 /** {@inheritDoc} */ 156 public void configure(JobConf job) { 157 super.setConf(job); 158 } 159 160 /** {@inheritDoc} */ 161 @SuppressWarnings("unchecked") 162 public RecordReader<LongWritable, T> getRecordReader(InputSplit split, 163 JobConf job, Reporter reporter) throws IOException { 164 165 // wrap the DBRR in a shim class to deal with API differences. 166 return new DBRecordReaderWrapper<T>( 167 (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) 168 createDBRecordReader( 169 (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); 170 } 171 172 /** {@inheritDoc} */ 173 public InputSplit[] getSplits(JobConf job, int chunks) throws IOException { 174 List<org.apache.hadoop.mapreduce.InputSplit> newSplits = 175 super.getSplits(new Job(job)); 176 InputSplit[] ret = new InputSplit[newSplits.size()]; 177 int i = 0; 178 for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) { 179 org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = 180 (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit)s; 181 ret[i++] = new DBInputSplit(split.getStart(), split.getEnd()); 182 } 183 return ret; 184 } 185 186 /** 187 * Initializes the map-part of the job with the appropriate input settings. 188 * 189 * @param job The job 190 * @param inputClass the class object implementing DBWritable, which is the 191 * Java object holding tuple fields. 192 * @param tableName The table to read data from 193 * @param conditions The condition which to select data with, eg. '(updated > 194 * 20070101 AND length > 0)' 195 * @param orderBy the fieldNames in the orderBy clause. 196 * @param fieldNames The field names in the table 197 * @see #setInput(JobConf, Class, String, String) 198 */ 199 public static void setInput(JobConf job, Class<? extends DBWritable> inputClass, 200 String tableName,String conditions, String orderBy, String... fieldNames) { 201 job.setInputFormat(DBInputFormat.class); 202 203 DBConfiguration dbConf = new DBConfiguration(job); 204 dbConf.setInputClass(inputClass); 205 dbConf.setInputTableName(tableName); 206 dbConf.setInputFieldNames(fieldNames); 207 dbConf.setInputConditions(conditions); 208 dbConf.setInputOrderBy(orderBy); 209 } 210 211 /** 212 * Initializes the map-part of the job with the appropriate input settings. 213 * 214 * @param job The job 215 * @param inputClass the class object implementing DBWritable, which is the 216 * Java object holding tuple fields. 217 * @param inputQuery the input query to select fields. Example : 218 * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" 219 * @param inputCountQuery the input query that returns the number of records in 220 * the table. 221 * Example : "SELECT COUNT(f1) FROM Mytable" 222 * @see #setInput(JobConf, Class, String, String, String, String...) 223 */ 224 public static void setInput(JobConf job, Class<? extends DBWritable> inputClass, 225 String inputQuery, String inputCountQuery) { 226 job.setInputFormat(DBInputFormat.class); 227 228 DBConfiguration dbConf = new DBConfiguration(job); 229 dbConf.setInputClass(inputClass); 230 dbConf.setInputQuery(inputQuery); 231 dbConf.setInputCountQuery(inputCountQuery); 232 233 } 234 }